Preserve verbatim URLs (#639)

## Summary

This PR adds a `VerbatimUrl` struct to preserve verbatim URLs throughout
the resolution and installation pipeline. In short, alongside the parsed
`Url`, we also keep the URL as written by the user. This enables us to
display the URL exactly as written by the user, rather than the
serialized path that we use internally.

This will be especially useful once we start expanding environment
variables since, at that point, we'll be able to write the version of
the URL that includes the _unexpected_ environment variable to the
output file.
This commit is contained in:
Charlie Marsh 2023-12-14 10:03:39 -05:00 committed by GitHub
parent eef9612719
commit ed8dfbfcf7
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
22 changed files with 255 additions and 96 deletions

View file

@ -35,7 +35,6 @@ use pyo3::{
use serde::{de, Deserialize, Deserializer, Serialize, Serializer};
use thiserror::Error;
use unicode_width::UnicodeWidthStr;
use url::Url;
pub use marker::{
MarkerEnvironment, MarkerExpression, MarkerOperator, MarkerTree, MarkerValue,
@ -43,8 +42,10 @@ pub use marker::{
};
use pep440_rs::{Version, VersionSpecifier, VersionSpecifiers};
use puffin_normalize::{ExtraName, PackageName};
pub use verbatim_url::VerbatimUrl;
mod marker;
mod verbatim_url;
/// Error with a span attached. Not that those aren't `String` but `Vec<char>` indices.
#[derive(Debug, Clone, Eq, PartialEq)]
@ -62,12 +63,12 @@ pub struct Pep508Error {
/// Either we have an error string from our parser or an upstream error from `url`
#[derive(Debug, Error, Clone, Eq, PartialEq)]
pub enum Pep508ErrorSource {
/// An error from our parser
/// An error from our parser.
#[error("{0}")]
String(String),
/// A url parsing error
/// A URL parsing error.
#[error(transparent)]
UrlError(#[from] url::ParseError),
UrlError(#[from] verbatim_url::Error),
}
impl Display for Pep508Error {
@ -397,7 +398,7 @@ pub enum VersionOrUrl {
/// A PEP 440 version specifier set
VersionSpecifier(VersionSpecifiers),
/// A installable URL
Url(Url),
Url(VerbatimUrl),
}
/// A `Vec<char>` and an index inside of it. Like [String], but with utf-8 aware indexing
@ -678,7 +679,7 @@ fn parse_url(chars: &mut CharIter) -> Result<VersionOrUrl, Pep508Error> {
input: chars.copy_chars(),
});
}
let url = Url::parse(&url).map_err(|err| Pep508Error {
let url = VerbatimUrl::parse(url).map_err(|err| Pep508Error {
message: Pep508ErrorSource::UrlError(err),
start,
len,
@ -888,7 +889,6 @@ mod tests {
use std::str::FromStr;
use indoc::indoc;
use url::Url;
use pep440_rs::{Operator, Version, VersionSpecifier};
use puffin_normalize::{ExtraName, PackageName};
@ -897,7 +897,7 @@ mod tests {
parse_markers_impl, MarkerExpression, MarkerOperator, MarkerTree, MarkerValue,
MarkerValueString, MarkerValueVersion,
};
use crate::{CharIter, Requirement, VersionOrUrl};
use crate::{CharIter, Requirement, VerbatimUrl, VersionOrUrl};
fn assert_err(input: &str, error: &str) {
assert_eq!(Requirement::from_str(input).unwrap_err().to_string(), error);
@ -1151,7 +1151,7 @@ mod tests {
name: PackageName::from_str("pip").unwrap(),
extras: None,
marker: None,
version_or_url: Some(VersionOrUrl::Url(Url::parse(url).unwrap())),
version_or_url: Some(VersionOrUrl::Url(VerbatimUrl::from_str(url).unwrap())),
};
assert_eq!(pip_url, expected);
}

View file

@ -0,0 +1,75 @@
use std::ops::Deref;
use url::Url;
/// A wrapper around [`Url`] that preserves the original string.
#[derive(Debug, Clone, Eq, derivative::Derivative)]
#[derivative(PartialEq, Hash)]
pub struct VerbatimUrl {
/// The parsed URL.
url: Url,
/// The URL as it was provided by the user.
#[derivative(PartialEq = "ignore")]
#[derivative(Hash = "ignore")]
given: Option<String>,
}
impl VerbatimUrl {
/// Parse a URL from a string.
pub fn parse(given: String) -> Result<Self, Error> {
let url = Url::parse(&given)?;
Ok(Self {
given: Some(given),
url,
})
}
/// Return the underlying [`Url`].
pub fn raw(&self) -> &Url {
&self.url
}
/// Convert a [`VerbatimUrl`] into a [`Url`].
pub fn to_url(&self) -> Url {
self.url.clone()
}
/// Create a [`VerbatimUrl`] from a [`Url`].
///
/// This method should be used sparingly (ideally, not at all), as it represents a loss of the
/// verbatim representation.
pub fn unknown(url: Url) -> Self {
Self { given: None, url }
}
}
impl std::str::FromStr for VerbatimUrl {
type Err = Error;
fn from_str(s: &str) -> Result<Self, Self::Err> {
Self::parse(s.to_owned())
}
}
impl std::fmt::Display for VerbatimUrl {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
if let Some(given) = &self.given {
given.fmt(f)
} else {
self.url.fmt(f)
}
}
}
impl Deref for VerbatimUrl {
type Target = Url;
fn deref(&self) -> &Self::Target {
&self.url
}
}
#[derive(thiserror::Error, Debug, Clone, PartialEq, Eq)]
pub enum Error {
#[error(transparent)]
Url(#[from] url::ParseError),
}