diff --git a/crates/distribution-types/src/lib.rs b/crates/distribution-types/src/lib.rs index 28ee4544c..b59d07268 100644 --- a/crates/distribution-types/src/lib.rs +++ b/crates/distribution-types/src/lib.rs @@ -41,7 +41,7 @@ use url::Url; use distribution_filename::{DistFilename, SourceDistFilename, WheelFilename}; use pep440_rs::Version; -use pep508_rs::{Scheme, VerbatimUrl}; +use pep508_rs::{Pep508Url, Scheme, VerbatimUrl}; use uv_normalize::PackageName; pub use crate::any::*; @@ -81,11 +81,11 @@ mod specified_requirement; mod traits; #[derive(Debug, Clone)] -pub enum VersionOrUrlRef<'a> { +pub enum VersionOrUrlRef<'a, T: Pep508Url = VerbatimUrl> { /// A PEP 440 version specifier, used to identify a distribution in a registry. Version(&'a Version), /// A URL, used to identify a distribution at an arbitrary location. - Url(&'a VerbatimUrl), + Url(&'a T), } impl Verbatim for VersionOrUrlRef<'_> { diff --git a/crates/pep508-rs/src/cursor.rs b/crates/pep508-rs/src/cursor.rs index 64014aeea..049e0efd1 100644 --- a/crates/pep508-rs/src/cursor.rs +++ b/crates/pep508-rs/src/cursor.rs @@ -1,10 +1,11 @@ -use crate::{Pep508Error, Pep508ErrorSource}; use std::fmt::{Display, Formatter}; use std::str::Chars; +use crate::{Pep508Error, Pep508ErrorSource, Pep508Url}; + /// A [`Cursor`] over a string. #[derive(Debug, Clone)] -pub struct Cursor<'a> { +pub(crate) struct Cursor<'a> { input: &'a str, chars: Chars<'a>, pos: usize, @@ -12,7 +13,7 @@ pub struct Cursor<'a> { impl<'a> Cursor<'a> { /// Convert from `&str`. - pub fn new(input: &'a str) -> Self { + pub(crate) fn new(input: &'a str) -> Self { Self { input, chars: input.chars(), @@ -21,7 +22,7 @@ impl<'a> Cursor<'a> { } /// Returns a new cursor starting at the given position. - pub fn at(self, pos: usize) -> Self { + pub(crate) fn at(self, pos: usize) -> Self { Self { input: self.input, chars: self.input[pos..].chars(), @@ -107,11 +108,11 @@ impl<'a> Cursor<'a> { } /// Consumes characters from the cursor, raising an error if it doesn't match the given token. - pub(crate) fn next_expect_char( + pub(crate) fn next_expect_char( &mut self, expected: char, span_start: usize, - ) -> Result<(), Pep508Error> { + ) -> Result<(), Pep508Error> { match self.next() { None => Err(Pep508Error { message: Pep508ErrorSource::String(format!( diff --git a/crates/pep508-rs/src/lib.rs b/crates/pep508-rs/src/lib.rs index 89d63d7a6..335e81737 100644 --- a/crates/pep508-rs/src/lib.rs +++ b/crates/pep508-rs/src/lib.rs @@ -16,18 +16,19 @@ #![warn(missing_docs)] +use cursor::Cursor; #[cfg(feature = "pyo3")] use std::collections::hash_map::DefaultHasher; use std::collections::HashSet; -use std::fmt::{Display, Formatter}; +use std::error::Error; +use std::fmt::{Debug, Display, Formatter}; #[cfg(feature = "pyo3")] use std::hash::{Hash, Hasher}; +#[cfg(feature = "pyo3")] +use std::ops::Deref; use std::path::Path; use std::str::FromStr; -use cursor::Cursor; -#[cfg(feature = "pyo3")] -use pep440_rs::PyVersion; #[cfg(feature = "pyo3")] use pyo3::{ create_exception, exceptions::PyNotImplementedError, pyclass, pyclass::CompareOp, pymethods, @@ -36,19 +37,22 @@ use pyo3::{ use serde::{de, Deserialize, Deserializer, Serialize, Serializer}; use thiserror::Error; use unicode_width::UnicodeWidthChar; +use url::Url; pub use marker::{ MarkerEnvironment, MarkerExpression, MarkerOperator, MarkerTree, MarkerValue, MarkerValueString, MarkerValueVersion, MarkerWarningKind, StringVersion, }; +#[cfg(feature = "pyo3")] +use pep440_rs::PyVersion; use pep440_rs::{Version, VersionSpecifier, VersionSpecifiers}; -use uv_fs::normalize_url_path; -// Parity with the crates.io version of pep508_rs -use crate::verbatim_url::VerbatimUrlError; #[cfg(feature = "non-pep508-extensions")] pub use unnamed::UnnamedRequirement; +// Parity with the crates.io version of pep508_rs pub use uv_normalize::{ExtraName, InvalidNameError, PackageName}; -pub use verbatim_url::{expand_env_vars, split_scheme, strip_host, Scheme, VerbatimUrl}; +pub use verbatim_url::{ + expand_env_vars, split_scheme, strip_host, Scheme, VerbatimUrl, VerbatimUrlError, +}; mod cursor; mod marker; @@ -58,9 +62,9 @@ mod verbatim_url; /// Error with a span attached. Not that those aren't `String` but `Vec` indices. #[derive(Debug)] -pub struct Pep508Error { +pub struct Pep508Error { /// Either we have an error string from our parser or an upstream error from `url` - pub message: Pep508ErrorSource, + pub message: Pep508ErrorSource, /// Span start index pub start: usize, /// Span length @@ -71,19 +75,19 @@ pub struct Pep508Error { /// Either we have an error string from our parser or an upstream error from `url` #[derive(Debug, Error)] -pub enum Pep508ErrorSource { +pub enum Pep508ErrorSource { /// An error from our parser. #[error("{0}")] String(String), /// A URL parsing error. #[error(transparent)] - UrlError(#[from] VerbatimUrlError), + UrlError(T::Err), /// The version requirement is not supported. #[error("{0}")] UnsupportedRequirement(String), } -impl Display for Pep508Error { +impl Display for Pep508Error { /// Pretty formatting with underline. fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { // We can use char indices here since it's a Vec @@ -117,7 +121,7 @@ impl Display for Pep508Error { } /// We need this to allow e.g. anyhow's `.context()` -impl std::error::Error for Pep508Error {} +impl> std::error::Error for Pep508Error {} #[cfg(feature = "pyo3")] create_exception!( @@ -129,8 +133,7 @@ create_exception!( /// A PEP 508 dependency specifier. #[derive(Hash, Debug, Clone, Eq, PartialEq)] -#[cfg_attr(feature = "pyo3", pyclass(module = "pep508"))] -pub struct Requirement { +pub struct Requirement { /// The distribution name such as `numpy` in /// `requests [security,tests] >= 2.8.1, == 2.8.* ; python_version > "3.8"`. pub name: PackageName, @@ -140,14 +143,14 @@ pub struct Requirement { /// The version specifier such as `>= 2.8.1`, `== 2.8.*` in /// `requests [security,tests] >= 2.8.1, == 2.8.* ; python_version > "3.8"`. /// or a url - pub version_or_url: Option, + pub version_or_url: Option>, /// The markers such as `python_version > "3.8"` in /// `requests [security,tests] >= 2.8.1, == 2.8.* ; python_version > "3.8"`. /// Those are a nested and/or tree. pub marker: Option, } -impl Display for Requirement { +impl Display for Requirement { fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { write!(f, "{}", self.name)?; if !self.extras.is_empty() { @@ -182,7 +185,7 @@ impl Display for Requirement { } /// -impl<'de> Deserialize<'de> for Requirement { +impl<'de, T: Pep508Url + Deserialize<'de>> Deserialize<'de> for Requirement { fn deserialize(deserializer: D) -> Result where D: Deserializer<'de>, @@ -193,7 +196,7 @@ impl<'de> Deserialize<'de> for Requirement { } /// -impl Serialize for Requirement { +impl Serialize for Requirement { fn serialize(&self, serializer: S) -> Result where S: Serializer, @@ -204,9 +207,24 @@ impl Serialize for Requirement { type MarkerWarning = (MarkerWarningKind, String, String); +#[cfg(feature = "pyo3")] +#[pyclass(module = "pep508", name = "Requirement")] +#[derive(Hash, Debug, Clone, Eq, PartialEq)] +/// A PEP 508 dependency specifier. +pub struct PyRequirement(Requirement); + +#[cfg(feature = "pyo3")] +impl Deref for PyRequirement { + type Target = Requirement; + + fn deref(&self) -> &Self::Target { + &self.0 + } +} + #[cfg(feature = "pyo3")] #[pymethods] -impl Requirement { +impl PyRequirement { /// The distribution name such as `numpy` in /// `requests [security,tests] >= 2.8.1, == 2.8.* ; python_version > "3.8"` #[getter] @@ -231,7 +249,10 @@ impl Requirement { /// Parses a PEP 440 string #[new] pub fn py_new(requirement: &str) -> PyResult { - Self::from_str(requirement).map_err(|err| PyPep508Error::new_err(err.to_string())) + Ok(Self( + Requirement::from_str(requirement) + .map_err(|err| PyPep508Error::new_err(err.to_string()))?, + )) } #[getter] @@ -252,7 +273,7 @@ impl Requirement { } fn __repr__(&self) -> String { - format!(r#""{self}""#) + self.to_string() } fn __richcmp__(&self, other: &Self, op: CompareOp) -> PyResult { @@ -335,7 +356,7 @@ impl Requirement { } } -impl Requirement { +impl Requirement { /// Returns `true` if the [`Version`] satisfies the [`Requirement`]. pub fn is_satisfied_by(&self, version: &Version) -> bool { let Some(version_or_url) = self.version_or_url.as_ref() else { @@ -420,20 +441,61 @@ impl Requirement { } } -impl FromStr for Requirement { - type Err = Pep508Error; +/// Type to parse URLs from `name @ ` into. Defaults to [`url::Url`]. +pub trait Pep508Url: Clone + Display + Debug { + /// String to URL parsing error + type Err: Error + Debug; - /// Parse a [Dependency Specifier](https://packaging.python.org/en/latest/specifications/dependency-specifiers/). - fn from_str(input: &str) -> Result { - parse_pep508_requirement(&mut Cursor::new(input), None) + /// Parse a url from `name @ `. Defaults to [`url::Url::parse_url`]. + fn parse_url(url: &str, working_dir: Option<&Path>) -> Result; +} + +impl Pep508Url for Url { + type Err = url::ParseError; + + fn parse_url(url: &str, _working_dir: Option<&Path>) -> Result { + Url::parse(url) } } -impl Requirement { +impl FromStr for Requirement { + type Err = Pep508Error; + /// Parse a [Dependency Specifier](https://packaging.python.org/en/latest/specifications/dependency-specifiers/). - pub fn parse(input: &str, working_dir: impl AsRef) -> Result { + fn from_str(input: &str) -> Result { + parse_pep508_requirement::(&mut Cursor::new(input), None) + } +} + +impl Requirement { + /// Parse a [Dependency Specifier](https://packaging.python.org/en/latest/specifications/dependency-specifiers/). + pub fn parse(input: &str, working_dir: impl AsRef) -> Result> { parse_pep508_requirement(&mut Cursor::new(input), Some(working_dir.as_ref())) } + + /// Convert a requirement with one URL type into one with another URL type. + /// + /// Example: `Requirement` to `Requirement`. + pub fn convert_url>(self) -> Requirement { + let Requirement { + name, + extras, + version_or_url, + marker, + } = self; + Requirement { + name, + extras, + version_or_url: match version_or_url { + None => None, + Some(VersionOrUrl::VersionSpecifier(specifier)) => { + Some(VersionOrUrl::VersionSpecifier(specifier)) + } + Some(VersionOrUrl::Url(url)) => Some(VersionOrUrl::Url(U::from(url))), + }, + marker, + } + } } /// A list of [`ExtraName`] that can be attached to a [`Requirement`]. @@ -442,8 +504,8 @@ pub struct Extras(Vec); impl Extras { /// Parse a list of extras. - pub fn parse(input: &str) -> Result { - Ok(Self(parse_extras(&mut Cursor::new(input))?)) + pub fn parse(input: &str) -> Result> { + Ok(Self(parse_extras_cursor(&mut Cursor::new(input))?)) } /// Convert the [`Extras`] into a [`Vec`] of [`ExtraName`]. @@ -454,20 +516,20 @@ impl Extras { /// The actual version specifier or URL to install. #[derive(Debug, Clone, Eq, Hash, PartialEq)] -pub enum VersionOrUrl { +pub enum VersionOrUrl { /// A PEP 440 version specifier set VersionSpecifier(VersionSpecifiers), /// A installable URL - Url(VerbatimUrl), + Url(T), } /// Unowned version specifier or URL to install. #[derive(Debug, Clone, Copy, Eq, Hash, PartialEq)] -pub enum VersionOrUrlRef<'a> { +pub enum VersionOrUrlRef<'a, T: Pep508Url = VerbatimUrl> { /// A PEP 440 version specifier set VersionSpecifier(&'a VersionSpecifiers), /// A installable URL - Url(&'a VerbatimUrl), + Url(&'a T), } impl<'a> From<&'a VersionOrUrl> for VersionOrUrlRef<'a> { @@ -481,7 +543,7 @@ impl<'a> From<&'a VersionOrUrl> for VersionOrUrlRef<'a> { } } -fn parse_name(cursor: &mut Cursor) -> Result { +fn parse_name(cursor: &mut Cursor) -> Result> { // https://peps.python.org/pep-0508/#names // ^([A-Z0-9]|[A-Z0-9][A-Z0-9._-]*[A-Z0-9])$ with re.IGNORECASE let start = cursor.pos(); @@ -578,7 +640,9 @@ fn looks_like_unnamed_requirement(cursor: &mut Cursor) -> bool { } /// parses extras in the `[extra1,extra2] format` -fn parse_extras(cursor: &mut Cursor) -> Result, Pep508Error> { +fn parse_extras_cursor( + cursor: &mut Cursor, +) -> Result, Pep508Error> { let Some(bracket_pos) = cursor.eat_char('[') else { return Ok(vec![]); }; @@ -697,123 +761,10 @@ fn parse_extras(cursor: &mut Cursor) -> Result, Pep508Error> { /// - `file:../editable/` /// - `../editable/` /// - `https://download.pytorch.org/whl/torch_stable.html` -fn parse_url(cursor: &mut Cursor, working_dir: Option<&Path>) -> Result { - // wsp* - cursor.eat_whitespace(); - // - let (start, len) = cursor.take_while(|char| !char.is_whitespace()); - let url = cursor.slice(start, len); - if url.is_empty() { - return Err(Pep508Error { - message: Pep508ErrorSource::String("Expected URL".to_string()), - start, - len, - input: cursor.to_string(), - }); - } - - let url = preprocess_url(url, working_dir, cursor, start, len)?; - - Ok(url) -} - -/// Create a `VerbatimUrl` to represent the requirement. -fn preprocess_url( - url: &str, - #[cfg_attr(not(feature = "non-pep508-extensions"), allow(unused))] working_dir: Option<&Path>, - cursor: &Cursor, - start: usize, - len: usize, -) -> Result { - // Expand environment variables in the URL. - let expanded = expand_env_vars(url); - - if let Some((scheme, path)) = split_scheme(&expanded) { - match Scheme::parse(scheme) { - // Ex) `file:///home/ferris/project/scripts/...`, `file://localhost/home/ferris/project/scripts/...`, or `file:../ferris/` - Some(Scheme::File) => { - // Strip the leading slashes, along with the `localhost` host, if present. - let path = strip_host(path); - - // Transform, e.g., `/C:/Users/ferris/wheel-0.42.0.tar.gz` to `C:\Users\ferris\wheel-0.42.0.tar.gz`. - let path = normalize_url_path(path); - - #[cfg(feature = "non-pep508-extensions")] - if let Some(working_dir) = working_dir { - return Ok(VerbatimUrl::parse_path(path.as_ref(), working_dir) - .with_given(url.to_string())); - } - - Ok(VerbatimUrl::parse_absolute_path(path.as_ref()) - .map_err(|err| Pep508Error { - message: Pep508ErrorSource::UrlError(err), - start, - len, - input: cursor.to_string(), - })? - .with_given(url.to_string())) - } - // Ex) `https://download.pytorch.org/whl/torch_stable.html` - Some(_) => { - // Ex) `https://download.pytorch.org/whl/torch_stable.html` - Ok(VerbatimUrl::parse_url(expanded.as_ref()) - .map_err(|err| Pep508Error { - message: Pep508ErrorSource::UrlError(VerbatimUrlError::Url(err)), - start, - len, - input: cursor.to_string(), - })? - .with_given(url.to_string())) - } - - // Ex) `C:\Users\ferris\wheel-0.42.0.tar.gz` - _ => { - #[cfg(feature = "non-pep508-extensions")] - if let Some(working_dir) = working_dir { - return Ok(VerbatimUrl::parse_path(expanded.as_ref(), working_dir) - .with_given(url.to_string())); - } - - Ok(VerbatimUrl::parse_absolute_path(expanded.as_ref()) - .map_err(|err| Pep508Error { - message: Pep508ErrorSource::UrlError(err), - start, - len, - input: cursor.to_string(), - })? - .with_given(url.to_string())) - } - } - } else { - // Ex) `../editable/` - #[cfg(feature = "non-pep508-extensions")] - if let Some(working_dir) = working_dir { - return Ok( - VerbatimUrl::parse_path(expanded.as_ref(), working_dir).with_given(url.to_string()) - ); - } - - Ok(VerbatimUrl::parse_absolute_path(expanded.as_ref()) - .map_err(|err| Pep508Error { - message: Pep508ErrorSource::UrlError(err), - start, - len, - input: cursor.to_string(), - })? - .with_given(url.to_string())) - } -} - -/// Like [`parse_url`], but allows for extras to be present at the end of the URL, to comply -/// with the non-PEP 508 extensions. -/// -/// For example: -/// - `https://download.pytorch.org/whl/torch_stable.html[dev]` -/// - `../editable[dev]` -fn parse_unnamed_url( +fn parse_url( cursor: &mut Cursor, working_dir: Option<&Path>, -) -> Result<(VerbatimUrl, Vec), Pep508Error> { +) -> Result> { // wsp* cursor.eat_whitespace(); // @@ -828,127 +779,16 @@ fn parse_unnamed_url( }); } - let url = preprocess_unnamed_url(url, working_dir, cursor, start, len)?; + let url = T::parse_url(url, working_dir).map_err(|err| Pep508Error { + message: Pep508ErrorSource::UrlError(err), + start, + len, + input: cursor.to_string(), + })?; Ok(url) } -/// Create a `VerbatimUrl` to represent the requirement, and extracts any extras at the end of the -/// URL, to comply with the non-PEP 508 extensions. -fn preprocess_unnamed_url( - url: &str, - #[cfg_attr(not(feature = "non-pep508-extensions"), allow(unused))] working_dir: Option<&Path>, - cursor: &Cursor, - start: usize, - len: usize, -) -> Result<(VerbatimUrl, Vec), Pep508Error> { - // Split extras _before_ expanding the URL. We assume that the extras are not environment - // variables. If we parsed the extras after expanding the URL, then the verbatim representation - // of the URL itself would be ambiguous, since it would consist of the environment variable, - // which would expand to _more_ than the URL. - let (url, extras) = if let Some((url, extras)) = split_extras(url) { - (url, Some(extras)) - } else { - (url, None) - }; - - // Parse the extras, if provided. - let extras = if let Some(extras) = extras { - parse_extras(&mut Cursor::new(extras)).map_err(|err| Pep508Error { - message: err.message, - start: start + url.len() + err.start, - len: err.len, - input: cursor.to_string(), - })? - } else { - vec![] - }; - - // Expand environment variables in the URL. - let expanded = expand_env_vars(url); - - if let Some((scheme, path)) = split_scheme(&expanded) { - match Scheme::parse(scheme) { - // Ex) `file:///home/ferris/project/scripts/...`, `file://localhost/home/ferris/project/scripts/...`, or `file:../ferris/` - Some(Scheme::File) => { - // Strip the leading slashes, along with the `localhost` host, if present. - let path = strip_host(path); - - // Transform, e.g., `/C:/Users/ferris/wheel-0.42.0.tar.gz` to `C:\Users\ferris\wheel-0.42.0.tar.gz`. - let path = normalize_url_path(path); - - #[cfg(feature = "non-pep508-extensions")] - if let Some(working_dir) = working_dir { - let url = VerbatimUrl::parse_path(path.as_ref(), working_dir) - .with_given(url.to_string()); - return Ok((url, extras)); - } - - let url = VerbatimUrl::parse_absolute_path(path.as_ref()) - .map_err(|err| Pep508Error { - message: Pep508ErrorSource::UrlError(err), - start, - len, - input: cursor.to_string(), - })? - .with_given(url.to_string()); - Ok((url, extras)) - } - // Ex) `https://download.pytorch.org/whl/torch_stable.html` - Some(_) => { - // Ex) `https://download.pytorch.org/whl/torch_stable.html` - let url = VerbatimUrl::parse_url(expanded.as_ref()) - .map_err(|err| Pep508Error { - message: Pep508ErrorSource::UrlError(VerbatimUrlError::Url(err)), - start, - len, - input: cursor.to_string(), - })? - .with_given(url.to_string()); - Ok((url, extras)) - } - - // Ex) `C:\Users\ferris\wheel-0.42.0.tar.gz` - _ => { - #[cfg(feature = "non-pep508-extensions")] - if let Some(working_dir) = working_dir { - let url = VerbatimUrl::parse_path(expanded.as_ref(), working_dir) - .with_given(url.to_string()); - return Ok((url, extras)); - } - - let url = VerbatimUrl::parse_absolute_path(expanded.as_ref()) - .map_err(|err| Pep508Error { - message: Pep508ErrorSource::UrlError(err), - start, - len, - input: cursor.to_string(), - })? - .with_given(url.to_string()); - Ok((url, extras)) - } - } - } else { - // Ex) `../editable/` - #[cfg(feature = "non-pep508-extensions")] - if let Some(working_dir) = working_dir { - let url = - VerbatimUrl::parse_path(expanded.as_ref(), working_dir).with_given(url.to_string()); - return Ok((url, extras)); - } - - let url = VerbatimUrl::parse_absolute_path(expanded.as_ref()) - .map_err(|err| Pep508Error { - message: Pep508ErrorSource::UrlError(err), - start, - len, - input: cursor.to_string(), - })? - .with_given(url.to_string()); - Ok((url, extras)) - } -} - /// Identify the extras in a relative URL (e.g., `../editable[dev]`). /// /// Pip uses `m = re.match(r'^(.+)(\[[^]]+])$', path)`. Our strategy is: @@ -972,12 +812,12 @@ pub fn split_extras(given: &str) -> Option<(&str, &str)> { } /// PEP 440 wrapper -fn parse_specifier( +fn parse_specifier( cursor: &mut Cursor, buffer: &str, start: usize, end: usize, -) -> Result { +) -> Result> { VersionSpecifier::from_str(buffer).map_err(|err| Pep508Error { message: Pep508ErrorSource::String(err.to_string()), start, @@ -991,7 +831,9 @@ fn parse_specifier( /// ```text /// version_one (wsp* ',' version_one)* /// ``` -fn parse_version_specifier(cursor: &mut Cursor) -> Result, Pep508Error> { +fn parse_version_specifier( + cursor: &mut Cursor, +) -> Result>, Pep508Error> { let mut start = cursor.pos(); let mut specifiers = Vec::new(); let mut buffer = String::new(); @@ -1026,9 +868,9 @@ fn parse_version_specifier(cursor: &mut Cursor) -> Result, /// ```text /// '(' version_one (wsp* ',' version_one)* ')' /// ``` -fn parse_version_specifier_parentheses( +fn parse_version_specifier_parentheses( cursor: &mut Cursor, -) -> Result, Pep508Error> { +) -> Result>, Pep508Error> { let brace_pos = cursor.pos(); cursor.next(); // Makes for slightly better error underline @@ -1063,10 +905,10 @@ fn parse_version_specifier_parentheses( } /// Parse a PEP 508-compliant [dependency specifier](https://packaging.python.org/en/latest/specifications/dependency-specifiers). -fn parse_pep508_requirement( +fn parse_pep508_requirement( cursor: &mut Cursor, working_dir: Option<&Path>, -) -> Result { +) -> Result, Pep508Error> { let start = cursor.pos(); // Technically, the grammar is: @@ -1088,7 +930,7 @@ fn parse_pep508_requirement( // wsp* cursor.eat_whitespace(); // extras? - let extras = parse_extras(cursor)?; + let extras = parse_extras_cursor(cursor)?; // wsp* cursor.eat_whitespace(); @@ -1110,7 +952,7 @@ fn parse_pep508_requirement( // a package name. pip supports this in `requirements.txt`, but it doesn't adhere to // the PEP 508 grammar. let mut clone = cursor.clone().at(start); - return if parse_url(&mut clone, working_dir).is_ok() { + return if parse_url::(&mut clone, working_dir).is_ok() { Err(Pep508Error { message: Pep508ErrorSource::UnsupportedRequirement("URL requirement must be preceded by a package name. Add the name of the package before the URL (e.g., `package_name @ https://...`).".to_string()), start, @@ -1138,7 +980,7 @@ fn parse_pep508_requirement( let marker = if cursor.peek_char() == Some(';') { // Skip past the semicolon cursor.next(); - Some(marker::parse_markers_impl(cursor)?) + Some(marker::parse_markers_cursor(cursor)?) } else { None }; @@ -1146,68 +988,7 @@ fn parse_pep508_requirement( cursor.eat_whitespace(); if let Some((pos, char)) = cursor.next() { if let Some(VersionOrUrl::Url(url)) = requirement_kind { - if let Some(given) = url.given() { - if given.ends_with(';') && marker.is_none() { - return Err(Pep508Error { - message: Pep508ErrorSource::String( - "Missing space before ';', the end of the URL is ambiguous".to_string(), - ), - start: requirement_end - ';'.len_utf8(), - len: ';'.len_utf8(), - input: cursor.to_string(), - }); - } - } - } - let message = if marker.is_none() { - format!(r#"Expected end of input or ';', found '{char}'"#) - } else { - format!(r#"Expected end of input, found '{char}'"#) - }; - return Err(Pep508Error { - message: Pep508ErrorSource::String(message), - start: pos, - len: char.len_utf8(), - input: cursor.to_string(), - }); - } - - Ok(Requirement { - name, - extras, - version_or_url: requirement_kind, - marker, - }) -} - -/// Parse a PEP 508-like direct URL specifier without a package name. -/// -/// Unlike pip, we allow extras on URLs and paths. -fn parse_unnamed_requirement( - cursor: &mut Cursor, - working_dir: Option<&Path>, -) -> Result { - cursor.eat_whitespace(); - - // Parse the URL itself, along with any extras. - let (url, extras) = parse_unnamed_url(cursor, working_dir)?; - let requirement_end = cursor.pos(); - - // wsp* - cursor.eat_whitespace(); - // quoted_marker? - let marker = if cursor.peek_char() == Some(';') { - // Skip past the semicolon - cursor.next(); - Some(marker::parse_markers_impl(cursor)?) - } else { - None - }; - // wsp* - cursor.eat_whitespace(); - if let Some((pos, char)) = cursor.next() { - if let Some(given) = url.given() { - if given.ends_with(';') && marker.is_none() { + if marker.is_none() && url.to_string().ends_with(';') { return Err(Pep508Error { message: Pep508ErrorSource::String( "Missing space before ';', the end of the URL is ambiguous".to_string(), @@ -1231,9 +1012,10 @@ fn parse_unnamed_requirement( }); } - Ok(UnnamedRequirement { - url, + Ok(Requirement { + name, extras, + version_or_url: requirement_kind, marker, }) } @@ -1258,7 +1040,7 @@ pub fn python_module(py: Python<'_>, m: &pyo3::Bound<'_, PyModule>) -> PyResult< m.add_class::()?; m.add_class::()?; - m.add_class::()?; + m.add_class::()?; m.add_class::()?; m.add("Pep508Error", py.get_type_bound::())?; Ok(()) @@ -1271,24 +1053,29 @@ mod tests { use std::str::FromStr; use insta::assert_snapshot; + use url::Url; use pep440_rs::{Operator, Version, VersionPattern, VersionSpecifier}; use uv_normalize::{ExtraName, InvalidNameError, PackageName}; use crate::cursor::Cursor; use crate::marker::{ - parse_markers_impl, MarkerExpression, MarkerOperator, MarkerTree, MarkerValue, + parse_markers_cursor, MarkerExpression, MarkerOperator, MarkerTree, MarkerValue, MarkerValueString, MarkerValueVersion, }; - use crate::unnamed::UnnamedRequirement; - use crate::{Pep508Error, Requirement, VerbatimUrl, VersionOrUrl}; + use crate::{Requirement, VerbatimUrl, VersionOrUrl}; - fn parse_pepe508_err(input: &str) -> String { - Requirement::from_str(input).unwrap_err().to_string() + fn parse_pep508_err(input: &str) -> String { + Requirement::::from_str(input) + .unwrap_err() + .to_string() } + #[cfg(feature = "non-pep508-extensions")] fn parse_unnamed_err(input: &str) -> String { - UnnamedRequirement::from_str(input).unwrap_err().to_string() + crate::UnnamedRequirement::from_str(input) + .unwrap_err() + .to_string() } #[cfg(windows)] @@ -1296,12 +1083,9 @@ mod tests { fn test_preprocess_url_windows() { use std::path::PathBuf; - let actual = crate::preprocess_url( - "file:///C:/Users/ferris/wheel-0.42.0.tar.gz", + let actual = crate::parse_url::( + &mut Cursor::new("file:///C:/Users/ferris/wheel-0.42.0.tar.gz"), None, - &Cursor::new(""), - 0, - 0, ) .unwrap() .to_file_path(); @@ -1312,7 +1096,7 @@ mod tests { #[test] fn error_empty() { assert_snapshot!( - parse_pepe508_err(""), + parse_pep508_err(""), @r" Empty field is not allowed for PEP508 @@ -1323,7 +1107,7 @@ mod tests { #[test] fn error_start() { assert_snapshot!( - parse_pepe508_err("_name"), + parse_pep508_err("_name"), @" Expected package name starting with an alphanumeric character, found '_' _name @@ -1334,7 +1118,7 @@ mod tests { #[test] fn error_end() { assert_snapshot!( - parse_pepe508_err("name_"), + parse_pep508_err("name_"), @" Package name must end with an alphanumeric character, not '_' name_ @@ -1345,7 +1129,7 @@ mod tests { #[test] fn basic_examples() { let input = r"requests[security,tests]>=2.8.1,==2.8.* ; python_version < '2.7'"; - let requests = Requirement::from_str(input).unwrap(); + let requests = Requirement::::from_str(input).unwrap(); assert_eq!(input, requests.to_string()); let expected = Requirement { name: PackageName::from_str("requests").unwrap(), @@ -1380,40 +1164,41 @@ mod tests { #[test] fn parenthesized_single() { - let numpy = Requirement::from_str("numpy ( >=1.19 )").unwrap(); + let numpy = Requirement::::from_str("numpy ( >=1.19 )").unwrap(); assert_eq!(numpy.name.as_ref(), "numpy"); } #[test] fn parenthesized_double() { - let numpy = Requirement::from_str("numpy ( >=1.19, <2.0 )").unwrap(); + let numpy = Requirement::::from_str("numpy ( >=1.19, <2.0 )").unwrap(); assert_eq!(numpy.name.as_ref(), "numpy"); } #[test] fn versions_single() { - let numpy = Requirement::from_str("numpy >=1.19 ").unwrap(); + let numpy = Requirement::::from_str("numpy >=1.19 ").unwrap(); assert_eq!(numpy.name.as_ref(), "numpy"); } #[test] fn versions_double() { - let numpy = Requirement::from_str("numpy >=1.19, <2.0 ").unwrap(); + let numpy = Requirement::::from_str("numpy >=1.19, <2.0 ").unwrap(); assert_eq!(numpy.name.as_ref(), "numpy"); } #[test] + #[cfg(feature = "non-pep508-extensions")] fn direct_url_no_extras() { - let numpy = UnnamedRequirement::from_str("https://files.pythonhosted.org/packages/28/4a/46d9e65106879492374999e76eb85f87b15328e06bd1550668f79f7b18c6/numpy-1.26.4-cp312-cp312-win32.whl").unwrap(); + let numpy = crate::UnnamedRequirement::from_str("https://files.pythonhosted.org/packages/28/4a/46d9e65106879492374999e76eb85f87b15328e06bd1550668f79f7b18c6/numpy-1.26.4-cp312-cp312-win32.whl").unwrap(); assert_eq!(numpy.url.to_string(), "https://files.pythonhosted.org/packages/28/4a/46d9e65106879492374999e76eb85f87b15328e06bd1550668f79f7b18c6/numpy-1.26.4-cp312-cp312-win32.whl"); assert_eq!(numpy.extras, vec![]); } #[test] - #[cfg(unix)] + #[cfg(all(unix, feature = "non-pep508-extensions"))] fn direct_url_extras() { let numpy = - UnnamedRequirement::from_str("/path/to/numpy-1.26.4-cp312-cp312-win32.whl[dev]") + crate::UnnamedRequirement::from_str("/path/to/numpy-1.26.4-cp312-cp312-win32.whl[dev]") .unwrap(); assert_eq!( numpy.url.to_string(), @@ -1423,11 +1208,12 @@ mod tests { } #[test] - #[cfg(windows)] + #[cfg(all(windows, feature = "non-pep508-extensions"))] fn direct_url_extras() { - let numpy = - UnnamedRequirement::from_str("C:\\path\\to\\numpy-1.26.4-cp312-cp312-win32.whl[dev]") - .unwrap(); + let numpy = crate::UnnamedRequirement::from_str( + "C:\\path\\to\\numpy-1.26.4-cp312-cp312-win32.whl[dev]", + ) + .unwrap(); assert_eq!( numpy.url.to_string(), "file:///C:/path/to/numpy-1.26.4-cp312-cp312-win32.whl" @@ -1438,7 +1224,7 @@ mod tests { #[test] fn error_extras_eof1() { assert_snapshot!( - parse_pepe508_err("black["), + parse_pep508_err("black["), @" Missing closing bracket (expected ']', found end of dependency specification) black[ @@ -1449,7 +1235,7 @@ mod tests { #[test] fn error_extras_eof2() { assert_snapshot!( - parse_pepe508_err("black[d"), + parse_pep508_err("black[d"), @" Missing closing bracket (expected ']', found end of dependency specification) black[d @@ -1460,7 +1246,7 @@ mod tests { #[test] fn error_extras_eof3() { assert_snapshot!( - parse_pepe508_err("black[d,"), + parse_pep508_err("black[d,"), @" Missing closing bracket (expected ']', found end of dependency specification) black[d, @@ -1471,7 +1257,7 @@ mod tests { #[test] fn error_extras_illegal_start1() { assert_snapshot!( - parse_pepe508_err("black[ö]"), + parse_pep508_err("black[ö]"), @" Expected an alphanumeric character starting the extra name, found 'ö' black[ö] @@ -1482,7 +1268,7 @@ mod tests { #[test] fn error_extras_illegal_start2() { assert_snapshot!( - parse_pepe508_err("black[_d]"), + parse_pep508_err("black[_d]"), @" Expected an alphanumeric character starting the extra name, found '_' black[_d] @@ -1493,7 +1279,7 @@ mod tests { #[test] fn error_extras_illegal_start3() { assert_snapshot!( - parse_pepe508_err("black[,]"), + parse_pep508_err("black[,]"), @" Expected either alphanumerical character (starting the extra name) or ']' (ending the extras section), found ',' black[,] @@ -1504,7 +1290,7 @@ mod tests { #[test] fn error_extras_illegal_character() { assert_snapshot!( - parse_pepe508_err("black[jüpyter]"), + parse_pep508_err("black[jüpyter]"), @" Invalid character in extras name, expected an alphanumeric character, '-', '_', '.', ',' or ']', found 'ü' black[jüpyter] @@ -1514,13 +1300,13 @@ mod tests { #[test] fn error_extras1() { - let numpy = Requirement::from_str("black[d]").unwrap(); + let numpy = Requirement::::from_str("black[d]").unwrap(); assert_eq!(numpy.extras, vec![ExtraName::from_str("d").unwrap()]); } #[test] fn error_extras2() { - let numpy = Requirement::from_str("black[d,jupyter]").unwrap(); + let numpy = Requirement::::from_str("black[d,jupyter]").unwrap(); assert_eq!( numpy.extras, vec![ @@ -1532,20 +1318,20 @@ mod tests { #[test] fn empty_extras() { - let black = Requirement::from_str("black[]").unwrap(); + let black = Requirement::::from_str("black[]").unwrap(); assert_eq!(black.extras, vec![]); } #[test] fn empty_extras_with_spaces() { - let black = Requirement::from_str("black[ ]").unwrap(); + let black = Requirement::::from_str("black[ ]").unwrap(); assert_eq!(black.extras, vec![]); } #[test] fn error_extra_with_trailing_comma() { assert_snapshot!( - parse_pepe508_err("black[d,]"), + parse_pep508_err("black[d,]"), @" Expected an alphanumeric character starting the extra name, found ']' black[d,] @@ -1556,7 +1342,7 @@ mod tests { #[test] fn error_parenthesized_pep440() { assert_snapshot!( - parse_pepe508_err("numpy ( ><1.19 )"), + parse_pep508_err("numpy ( ><1.19 )"), @" no such comparison operator \"><\", must be one of ~= == != <= >= < > === numpy ( ><1.19 ) @@ -1567,7 +1353,7 @@ mod tests { #[test] fn error_parenthesized_parenthesis() { assert_snapshot!( - parse_pepe508_err("numpy ( >=1.19"), + parse_pep508_err("numpy ( >=1.19"), @" Missing closing parenthesis (expected ')', found end of dependency specification) numpy ( >=1.19 @@ -1578,7 +1364,7 @@ mod tests { #[test] fn error_whats_that() { assert_snapshot!( - parse_pepe508_err("numpy % 1.16"), + parse_pep508_err("numpy % 1.16"), @" Expected one of `@`, `(`, `<`, `=`, `>`, `~`, `!`, `;`, found `%` numpy % 1.16 @@ -1596,7 +1382,7 @@ mod tests { name: PackageName::from_str("pip").unwrap(), extras: vec![], marker: None, - version_or_url: Some(VersionOrUrl::Url(VerbatimUrl::from_str(url).unwrap())), + version_or_url: Some(VersionOrUrl::Url(Url::parse(url).unwrap())), }; assert_eq!(pip_url, expected); } @@ -1604,7 +1390,7 @@ mod tests { #[test] fn test_marker_parsing() { let marker = r#"python_version == "2.7" and (sys_platform == "win32" or (os_name == "linux" and implementation_name == 'cpython'))"#; - let actual = parse_markers_impl(&mut Cursor::new(marker)).unwrap(); + let actual = parse_markers_cursor::(&mut Cursor::new(marker)).unwrap(); let expected = MarkerTree::And(vec![ MarkerTree::Expression(MarkerExpression { l_value: MarkerValue::MarkerEnvVersion(MarkerValueVersion::PythonVersion), @@ -1638,13 +1424,13 @@ mod tests { #[test] fn name_and_marker() { - Requirement::from_str(r#"numpy; sys_platform == "win32" or (os_name == "linux" and implementation_name == 'cpython')"#).unwrap(); + Requirement::::from_str(r#"numpy; sys_platform == "win32" or (os_name == "linux" and implementation_name == 'cpython')"#).unwrap(); } #[test] fn error_marker_incomplete1() { assert_snapshot!( - parse_pepe508_err(r"numpy; sys_platform"), + parse_pep508_err(r"numpy; sys_platform"), @" Expected a valid marker operator (such as '>=' or 'not in'), found '' numpy; sys_platform @@ -1655,7 +1441,7 @@ mod tests { #[test] fn error_marker_incomplete2() { assert_snapshot!( - parse_pepe508_err(r"numpy; sys_platform =="), + parse_pep508_err(r"numpy; sys_platform =="), @r" Expected marker value, found end of dependency specification numpy; sys_platform == @@ -1666,7 +1452,7 @@ mod tests { #[test] fn error_marker_incomplete3() { assert_snapshot!( - parse_pepe508_err(r#"numpy; sys_platform == "win32" or"#), + parse_pep508_err(r#"numpy; sys_platform == "win32" or"#), @r#" Expected marker value, found end of dependency specification numpy; sys_platform == "win32" or @@ -1677,7 +1463,7 @@ mod tests { #[test] fn error_marker_incomplete4() { assert_snapshot!( - parse_pepe508_err(r#"numpy; sys_platform == "win32" or (os_name == "linux""#), + parse_pep508_err(r#"numpy; sys_platform == "win32" or (os_name == "linux""#), @r#" Expected ')', found end of dependency specification numpy; sys_platform == "win32" or (os_name == "linux" @@ -1688,7 +1474,7 @@ mod tests { #[test] fn error_marker_incomplete5() { assert_snapshot!( - parse_pepe508_err(r#"numpy; sys_platform == "win32" or (os_name == "linux" and"#), + parse_pep508_err(r#"numpy; sys_platform == "win32" or (os_name == "linux" and"#), @r#" Expected marker value, found end of dependency specification numpy; sys_platform == "win32" or (os_name == "linux" and @@ -1699,7 +1485,7 @@ mod tests { #[test] fn error_pep440() { assert_snapshot!( - parse_pepe508_err(r"numpy >=1.1.*"), + parse_pep508_err(r"numpy >=1.1.*"), @r" Operator >= cannot be used with a wildcard version specifier numpy >=1.1.* @@ -1710,7 +1496,7 @@ mod tests { #[test] fn error_no_name() { assert_snapshot!( - parse_pepe508_err(r"==0.0"), + parse_pep508_err(r"==0.0"), @r" Expected package name starting with an alphanumeric character, found '=' ==0.0 @@ -1722,7 +1508,7 @@ mod tests { #[test] fn error_unnamedunnamed_url() { assert_snapshot!( - parse_pepe508_err(r"git+https://github.com/pallets/flask.git"), + parse_pep508_err(r"git+https://github.com/pallets/flask.git"), @" URL requirement must be preceded by a package name. Add the name of the package before the URL (e.g., `package_name @ https://...`). git+https://github.com/pallets/flask.git @@ -1733,7 +1519,7 @@ mod tests { #[test] fn error_unnamed_file_path() { assert_snapshot!( - parse_pepe508_err(r"/path/to/flask.tar.gz"), + parse_pep508_err(r"/path/to/flask.tar.gz"), @r###" URL requirement must be preceded by a package name. Add the name of the package before the URL (e.g., `package_name @ /path/to/file`). /path/to/flask.tar.gz @@ -1745,7 +1531,7 @@ mod tests { #[test] fn error_no_comma_between_extras() { assert_snapshot!( - parse_pepe508_err(r"name[bar baz]"), + parse_pep508_err(r"name[bar baz]"), @" Expected either ',' (separating extras) or ']' (ending the extras section), found 'b' name[bar baz] @@ -1756,7 +1542,7 @@ mod tests { #[test] fn error_extra_comma_after_extras() { assert_snapshot!( - parse_pepe508_err(r"name[bar, baz,]"), + parse_pep508_err(r"name[bar, baz,]"), @" Expected an alphanumeric character starting the extra name, found ']' name[bar, baz,] @@ -1767,7 +1553,7 @@ mod tests { #[test] fn error_extras_not_closed() { assert_snapshot!( - parse_pepe508_err(r"name[bar, baz >= 1.0"), + parse_pep508_err(r"name[bar, baz >= 1.0"), @" Expected either ',' (separating extras) or ']' (ending the extras section), found '>' name[bar, baz >= 1.0 @@ -1778,7 +1564,7 @@ mod tests { #[test] fn error_no_space_after_url() { assert_snapshot!( - parse_pepe508_err(r"name @ https://example.com/; extra == 'example'"), + parse_pep508_err(r"name @ https://example.com/; extra == 'example'"), @" Missing space before ';', the end of the URL is ambiguous name @ https://example.com/; extra == 'example' @@ -1789,7 +1575,7 @@ mod tests { #[test] fn error_name_at_nothing() { assert_snapshot!( - parse_pepe508_err(r"name @"), + parse_pep508_err(r"name @"), @" Expected URL name @ @@ -1800,7 +1586,7 @@ mod tests { #[test] fn test_error_invalid_marker_key() { assert_snapshot!( - parse_pepe508_err(r"name; invalid_name"), + parse_pep508_err(r"name; invalid_name"), @" Expected a valid marker name, found 'invalid_name' name; invalid_name @@ -1811,7 +1597,7 @@ mod tests { #[test] fn error_markers_invalid_order() { assert_snapshot!( - parse_pepe508_err("name; '3.7' <= invalid_name"), + parse_pep508_err("name; '3.7' <= invalid_name"), @" Expected a valid marker name, found 'invalid_name' name; '3.7' <= invalid_name @@ -1822,7 +1608,7 @@ mod tests { #[test] fn error_markers_notin() { assert_snapshot!( - parse_pepe508_err("name; '3.7' notin python_version"), + parse_pep508_err("name; '3.7' notin python_version"), @" Expected a valid marker operator (such as '>=' or 'not in'), found 'notin' name; '3.7' notin python_version @@ -1833,7 +1619,7 @@ mod tests { #[test] fn error_markers_inpython_version() { assert_snapshot!( - parse_pepe508_err("name; '3.6'inpython_version"), + parse_pep508_err("name; '3.6'inpython_version"), @" Expected a valid marker operator (such as '>=' or 'not in'), found 'inpython_version' name; '3.6'inpython_version @@ -1844,7 +1630,7 @@ mod tests { #[test] fn error_markers_not_python_version() { assert_snapshot!( - parse_pepe508_err("name; '3.7' not python_version"), + parse_pep508_err("name; '3.7' not python_version"), @" Expected 'i', found 'p' name; '3.7' not python_version @@ -1855,7 +1641,7 @@ mod tests { #[test] fn error_markers_invalid_operator() { assert_snapshot!( - parse_pepe508_err("name; '3.7' ~ python_version"), + parse_pep508_err("name; '3.7' ~ python_version"), @" Expected a valid marker operator (such as '>=' or 'not in'), found '~' name; '3.7' ~ python_version @@ -1866,7 +1652,7 @@ mod tests { #[test] fn error_invalid_prerelease() { assert_snapshot!( - parse_pepe508_err("name==1.0.org1"), + parse_pep508_err("name==1.0.org1"), @r###" after parsing '1.0', found '.org1', which is not part of a valid version name==1.0.org1 @@ -1878,7 +1664,7 @@ mod tests { #[test] fn error_no_version_value() { assert_snapshot!( - parse_pepe508_err("name=="), + parse_pep508_err("name=="), @" Unexpected end of version specifier, expected version name== @@ -1889,7 +1675,7 @@ mod tests { #[test] fn error_no_version_operator() { assert_snapshot!( - parse_pepe508_err("name 1.0"), + parse_pep508_err("name 1.0"), @" Expected one of `@`, `(`, `<`, `=`, `>`, `~`, `!`, `;`, found `1` name 1.0 @@ -1900,7 +1686,7 @@ mod tests { #[test] fn error_random_char() { assert_snapshot!( - parse_pepe508_err("name >= 1.0 #"), + parse_pep508_err("name >= 1.0 #"), @" Trailing `#` is not allowed name >= 1.0 # @@ -1909,6 +1695,7 @@ mod tests { } #[test] + #[cfg(feature = "non-pep508-extensions")] fn error_invalid_extra_unnamed_url() { assert_snapshot!( parse_unnamed_err("/foo-3.0.0-py3-none-any.whl[d,]"), @@ -1933,23 +1720,23 @@ mod tests { for requirement in requirements { assert_eq!( - Requirement::parse(requirement, &cwd).is_ok(), + Requirement::::parse(requirement, &cwd).is_ok(), cfg!(feature = "non-pep508-extensions"), "{}: {:?}", requirement, - Requirement::parse(requirement, &cwd) + Requirement::::parse(requirement, &cwd) ); } } #[test] fn no_space_after_operator() { - let requirement = Requirement::from_str("pytest;'4.0'>=python_version").unwrap(); + let requirement = Requirement::::from_str("pytest;'4.0'>=python_version").unwrap(); assert_eq!(requirement.to_string(), "pytest ; '4.0' >= python_version"); } #[test] - fn path_with_fragment() -> Result<(), Pep508Error> { + fn path_with_fragment() { let requirements = if cfg!(windows) { &[ "wheel @ file:///C:/Users/ferris/wheel-0.42.0.whl#hash=somehash", @@ -1964,7 +1751,9 @@ mod tests { for requirement in requirements { // Extract the URL. - let Some(VersionOrUrl::Url(url)) = Requirement::from_str(requirement)?.version_or_url + let Some(VersionOrUrl::Url(url)) = Requirement::::from_str(requirement) + .unwrap() + .version_or_url else { unreachable!("Expected a URL") }; @@ -1977,26 +1766,25 @@ mod tests { url.path() ); } - - Ok(()) } #[test] fn add_extra_marker() -> Result<(), InvalidNameError> { - let requirement = Requirement::from_str("pytest").unwrap(); - let expected = Requirement::from_str("pytest; extra == 'dotenv'").unwrap(); + let requirement = Requirement::::from_str("pytest").unwrap(); + let expected = Requirement::::from_str("pytest; extra == 'dotenv'").unwrap(); let actual = requirement.with_extra_marker(&ExtraName::from_str("dotenv")?); assert_eq!(actual, expected); - let requirement = Requirement::from_str("pytest; '4.0' >= python_version").unwrap(); + let requirement = Requirement::::from_str("pytest; '4.0' >= python_version").unwrap(); let expected = Requirement::from_str("pytest; '4.0' >= python_version and extra == 'dotenv'").unwrap(); let actual = requirement.with_extra_marker(&ExtraName::from_str("dotenv")?); assert_eq!(actual, expected); - let requirement = - Requirement::from_str("pytest; '4.0' >= python_version or sys_platform == 'win32'") - .unwrap(); + let requirement = Requirement::::from_str( + "pytest; '4.0' >= python_version or sys_platform == 'win32'", + ) + .unwrap(); let expected = Requirement::from_str( "pytest; ('4.0' >= python_version or sys_platform == 'win32') and extra == 'dotenv'", ) diff --git a/crates/pep508-rs/src/marker.rs b/crates/pep508-rs/src/marker.rs index 5112b3281..bd9ac5905 100644 --- a/crates/pep508-rs/src/marker.rs +++ b/crates/pep508-rs/src/marker.rs @@ -9,21 +9,24 @@ //! outcomes. This implementation tries to carefully validate everything and emit warnings whenever //! bogus comparisons with unintended semantics are made. -use crate::cursor::Cursor; -use crate::{Pep508Error, Pep508ErrorSource}; -use pep440_rs::{Version, VersionPattern, VersionSpecifier}; +use std::collections::HashSet; +use std::fmt::{Display, Formatter}; +use std::ops::Deref; +use std::str::FromStr; + #[cfg(feature = "pyo3")] use pyo3::{ basic::CompareOp, exceptions::PyValueError, pyclass, pymethods, types::PyAnyMethods, PyResult, Python, }; use serde::{de, Deserialize, Deserializer, Serialize, Serializer}; -use std::collections::HashSet; -use std::fmt::{Display, Formatter}; -use std::ops::Deref; -use std::str::FromStr; + +use pep440_rs::{Version, VersionPattern, VersionSpecifier}; use uv_normalize::ExtraName; +use crate::cursor::Cursor; +use crate::{Pep508Error, Pep508ErrorSource, Pep508Url}; + /// Ways in which marker evaluation can fail #[derive(Debug, Eq, Hash, Ord, PartialOrd, PartialEq, Clone, Copy)] #[cfg_attr(feature = "pyo3", pyclass(module = "pep508"))] @@ -1231,7 +1234,9 @@ impl Display for MarkerTree { /// marker_op = version_cmp | (wsp* 'in') | (wsp* 'not' wsp+ 'in') /// ``` /// The `wsp*` has already been consumed by the caller. -fn parse_marker_operator(cursor: &mut Cursor) -> Result { +fn parse_marker_operator( + cursor: &mut Cursor, +) -> Result> { let (start, len) = if cursor.peek_char().is_some_and(|c| c.is_alphabetic()) { // "in" or "not" cursor.take_while(|char| !char.is_whitespace() && char != '\'' && char != '"') @@ -1284,7 +1289,7 @@ fn parse_marker_operator(cursor: &mut Cursor) -> Result Result { +fn parse_marker_value(cursor: &mut Cursor) -> Result> { // > User supplied constants are always encoded as strings with either ' or " quote marks. Note // > that backslash escapes are not defined, but existing implementations do support them. They // > are not included in this specification because they add complexity and there is no observable @@ -1328,7 +1333,9 @@ fn parse_marker_value(cursor: &mut Cursor) -> Result { /// ```text /// marker_var:l marker_op:o marker_var:r /// ``` -fn parse_marker_key_op_value(cursor: &mut Cursor) -> Result { +fn parse_marker_key_op_value( + cursor: &mut Cursor, +) -> Result> { cursor.eat_whitespace(); let lvalue = parse_marker_value(cursor)?; cursor.eat_whitespace(); @@ -1349,7 +1356,7 @@ fn parse_marker_key_op_value(cursor: &mut Cursor) -> Result (o, l, r) /// | wsp* '(' marker:m wsp* ')' -> m /// ``` -fn parse_marker_expr(cursor: &mut Cursor) -> Result { +fn parse_marker_expr(cursor: &mut Cursor) -> Result> { cursor.eat_whitespace(); if let Some(start_pos) = cursor.eat_char('(') { let marker = parse_marker_or(cursor)?; @@ -1364,7 +1371,7 @@ fn parse_marker_expr(cursor: &mut Cursor) -> Result { /// marker_and = marker_expr:l wsp* 'and' marker_expr:r -> ('and', l, r) /// | marker_expr:m -> m /// ``` -fn parse_marker_and(cursor: &mut Cursor) -> Result { +fn parse_marker_and(cursor: &mut Cursor) -> Result> { parse_marker_op(cursor, "and", MarkerTree::And, parse_marker_expr) } @@ -1372,17 +1379,17 @@ fn parse_marker_and(cursor: &mut Cursor) -> Result { /// marker_or = marker_and:l wsp* 'or' marker_and:r -> ('or', l, r) /// | marker_and:m -> m /// ``` -fn parse_marker_or(cursor: &mut Cursor) -> Result { +fn parse_marker_or(cursor: &mut Cursor) -> Result> { parse_marker_op(cursor, "or", MarkerTree::Or, parse_marker_and) } /// Parses both `marker_and` and `marker_or` -fn parse_marker_op( +fn parse_marker_op( cursor: &mut Cursor, op: &str, op_constructor: fn(Vec) -> MarkerTree, - parse_inner: fn(&mut Cursor) -> Result, -) -> Result { + parse_inner: fn(&mut Cursor) -> Result>, +) -> Result> { // marker_and or marker_expr let first_element = parse_inner(cursor)?; // wsp* @@ -1420,7 +1427,9 @@ fn parse_marker_op( /// ```text /// marker = marker_or^ /// ``` -pub(crate) fn parse_markers_impl(cursor: &mut Cursor) -> Result { +pub(crate) fn parse_markers_cursor( + cursor: &mut Cursor, +) -> Result> { let marker = parse_marker_or(cursor)?; cursor.eat_whitespace(); if let Some((pos, unexpected)) = cursor.next() { @@ -1440,21 +1449,24 @@ pub(crate) fn parse_markers_impl(cursor: &mut Cursor) -> Result Result { +fn parse_markers(markers: &str) -> Result> { let mut chars = Cursor::new(markers); - parse_markers_impl(&mut chars) + parse_markers_cursor(&mut chars) } #[cfg(test)] mod test { + use std::str::FromStr; + + use insta::assert_snapshot; + + use uv_normalize::ExtraName; + use crate::marker::{MarkerEnvironment, StringVersion}; use crate::{ MarkerExpression, MarkerOperator, MarkerTree, MarkerValue, MarkerValueString, MarkerValueVersion, }; - use insta::assert_snapshot; - use std::str::FromStr; - use uv_normalize::ExtraName; fn parse_err(input: &str) -> String { MarkerTree::from_str(input).unwrap_err().to_string() diff --git a/crates/pep508-rs/src/unnamed.rs b/crates/pep508-rs/src/unnamed.rs index 2020aba4d..9b9f101f2 100644 --- a/crates/pep508-rs/src/unnamed.rs +++ b/crates/pep508-rs/src/unnamed.rs @@ -2,14 +2,17 @@ use std::fmt::{Display, Formatter}; use std::path::Path; use std::str::FromStr; -#[cfg(feature = "pyo3")] -use pyo3::pyclass; use serde::{de, Deserialize, Deserializer, Serialize, Serializer}; +use uv_fs::normalize_url_path; use uv_normalize::ExtraName; -use crate::cursor::Cursor; -use crate::{MarkerEnvironment, MarkerTree, Pep508Error, VerbatimUrl}; +use crate::marker::parse_markers_cursor; +use crate::{ + expand_env_vars, parse_extras_cursor, split_extras, split_scheme, strip_host, Cursor, + MarkerEnvironment, MarkerTree, Pep508Error, Pep508ErrorSource, Scheme, VerbatimUrl, + VerbatimUrlError, +}; /// A PEP 508-like, direct URL dependency specifier without a package name. /// @@ -17,7 +20,6 @@ use crate::{MarkerEnvironment, MarkerTree, Pep508Error, VerbatimUrl}; /// dependencies. This isn't compliant with PEP 508, but is common in `requirements.txt`, which /// is implementation-defined. #[derive(Hash, Debug, Clone, Eq, PartialEq)] -#[cfg_attr(feature = "pyo3", pyclass(module = "pep508"))] pub struct UnnamedRequirement { /// The direct URL that defines the version specifier. pub url: VerbatimUrl, @@ -84,17 +86,223 @@ impl Serialize for UnnamedRequirement { } impl FromStr for UnnamedRequirement { - type Err = Pep508Error; + type Err = Pep508Error; /// Parse a PEP 508-like direct URL requirement without a package name. fn from_str(input: &str) -> Result { - crate::parse_unnamed_requirement(&mut Cursor::new(input), None) + parse_unnamed_requirement(&mut Cursor::new(input), None) } } impl UnnamedRequirement { /// Parse a PEP 508-like direct URL requirement without a package name. - pub fn parse(input: &str, working_dir: impl AsRef) -> Result { - crate::parse_unnamed_requirement(&mut Cursor::new(input), Some(working_dir.as_ref())) + pub fn parse( + input: &str, + working_dir: impl AsRef, + ) -> Result> { + parse_unnamed_requirement(&mut Cursor::new(input), Some(working_dir.as_ref())) } } + +/// Parse a PEP 508-like direct URL specifier without a package name. +/// +/// Unlike pip, we allow extras on URLs and paths. +fn parse_unnamed_requirement( + cursor: &mut Cursor, + working_dir: Option<&Path>, +) -> Result> { + cursor.eat_whitespace(); + + // Parse the URL itself, along with any extras. + let (url, extras) = parse_unnamed_url(cursor, working_dir)?; + let requirement_end = cursor.pos(); + + // wsp* + cursor.eat_whitespace(); + // quoted_marker? + let marker = if cursor.peek_char() == Some(';') { + // Skip past the semicolon + cursor.next(); + Some(parse_markers_cursor(cursor)?) + } else { + None + }; + // wsp* + cursor.eat_whitespace(); + if let Some((pos, char)) = cursor.next() { + if let Some(given) = url.given() { + if given.ends_with(';') && marker.is_none() { + return Err(Pep508Error { + message: Pep508ErrorSource::String( + "Missing space before ';', the end of the URL is ambiguous".to_string(), + ), + start: requirement_end - ';'.len_utf8(), + len: ';'.len_utf8(), + input: cursor.to_string(), + }); + } + } + let message = if marker.is_none() { + format!(r#"Expected end of input or ';', found '{char}'"#) + } else { + format!(r#"Expected end of input, found '{char}'"#) + }; + return Err(Pep508Error { + message: Pep508ErrorSource::String(message), + start: pos, + len: char.len_utf8(), + input: cursor.to_string(), + }); + } + + Ok(UnnamedRequirement { + url, + extras, + marker, + }) +} + +/// Create a `VerbatimUrl` to represent the requirement, and extracts any extras at the end of the +/// URL, to comply with the non-PEP 508 extensions. +fn preprocess_unnamed_url( + url: &str, + #[cfg_attr(not(feature = "non-pep508-extensions"), allow(unused))] working_dir: Option<&Path>, + cursor: &Cursor, + start: usize, + len: usize, +) -> Result<(VerbatimUrl, Vec), Pep508Error> { + // Split extras _before_ expanding the URL. We assume that the extras are not environment + // variables. If we parsed the extras after expanding the URL, then the verbatim representation + // of the URL itself would be ambiguous, since it would consist of the environment variable, + // which would expand to _more_ than the URL. + let (url, extras) = if let Some((url, extras)) = split_extras(url) { + (url, Some(extras)) + } else { + (url, None) + }; + + // Parse the extras, if provided. + let extras = if let Some(extras) = extras { + parse_extras_cursor(&mut Cursor::new(extras)).map_err(|err| Pep508Error { + message: err.message, + start: start + url.len() + err.start, + len: err.len, + input: cursor.to_string(), + })? + } else { + vec![] + }; + + // Expand environment variables in the URL. + let expanded = expand_env_vars(url); + + if let Some((scheme, path)) = split_scheme(&expanded) { + match Scheme::parse(scheme) { + // Ex) `file:///home/ferris/project/scripts/...`, `file://localhost/home/ferris/project/scripts/...`, or `file:../ferris/` + Some(Scheme::File) => { + // Strip the leading slashes, along with the `localhost` host, if present. + let path = strip_host(path); + + // Transform, e.g., `/C:/Users/ferris/wheel-0.42.0.tar.gz` to `C:\Users\ferris\wheel-0.42.0.tar.gz`. + let path = normalize_url_path(path); + + #[cfg(feature = "non-pep508-extensions")] + if let Some(working_dir) = working_dir { + let url = VerbatimUrl::parse_path(path.as_ref(), working_dir) + .with_given(url.to_string()); + return Ok((url, extras)); + } + + let url = VerbatimUrl::parse_absolute_path(path.as_ref()) + .map_err(|err| Pep508Error { + message: Pep508ErrorSource::::UrlError(err), + start, + len, + input: cursor.to_string(), + })? + .with_given(url.to_string()); + Ok((url, extras)) + } + // Ex) `https://download.pytorch.org/whl/torch_stable.html` + Some(_) => { + // Ex) `https://download.pytorch.org/whl/torch_stable.html` + let url = VerbatimUrl::parse_url(expanded.as_ref()) + .map_err(|err| Pep508Error { + message: Pep508ErrorSource::::UrlError(VerbatimUrlError::Url( + err, + )), + start, + len, + input: cursor.to_string(), + })? + .with_given(url.to_string()); + Ok((url, extras)) + } + + // Ex) `C:\Users\ferris\wheel-0.42.0.tar.gz` + _ => { + if let Some(working_dir) = working_dir { + let url = VerbatimUrl::parse_path(expanded.as_ref(), working_dir) + .with_given(url.to_string()); + return Ok((url, extras)); + } + + let url = VerbatimUrl::parse_absolute_path(expanded.as_ref()) + .map_err(|err| Pep508Error { + message: Pep508ErrorSource::UrlError(err), + start, + len, + input: cursor.to_string(), + })? + .with_given(url.to_string()); + Ok((url, extras)) + } + } + } else { + // Ex) `../editable/` + if let Some(working_dir) = working_dir { + let url = + VerbatimUrl::parse_path(expanded.as_ref(), working_dir).with_given(url.to_string()); + return Ok((url, extras)); + } + + let url = VerbatimUrl::parse_absolute_path(expanded.as_ref()) + .map_err(|err| Pep508Error { + message: Pep508ErrorSource::UrlError(err), + start, + len, + input: cursor.to_string(), + })? + .with_given(url.to_string()); + Ok((url, extras)) + } +} + +/// Like [`crate::parse_url`], but allows for extras to be present at the end of the URL, to comply +/// with the non-PEP 508 extensions. +/// +/// For example: +/// - `https://download.pytorch.org/whl/torch_stable.html[dev]` +/// - `../editable[dev]` +fn parse_unnamed_url( + cursor: &mut Cursor, + working_dir: Option<&Path>, +) -> Result<(VerbatimUrl, Vec), Pep508Error> { + // wsp* + cursor.eat_whitespace(); + // + let (start, len) = cursor.take_while(|char| !char.is_whitespace()); + let url = cursor.slice(start, len); + if url.is_empty() { + return Err(Pep508Error { + message: Pep508ErrorSource::String("Expected URL".to_string()), + start, + len, + input: cursor.to_string(), + }); + } + + let url = preprocess_unnamed_url(url, working_dir, cursor, start, len)?; + + Ok(url) +} diff --git a/crates/pep508-rs/src/verbatim_url.rs b/crates/pep508-rs/src/verbatim_url.rs index 33b573dad..af8b9df76 100644 --- a/crates/pep508-rs/src/verbatim_url.rs +++ b/crates/pep508-rs/src/verbatim_url.rs @@ -5,9 +5,12 @@ use std::path::{Path, PathBuf}; use once_cell::sync::Lazy; use regex::Regex; +use thiserror::Error; use url::{ParseError, Url}; -use uv_fs::normalize_path; +use uv_fs::{normalize_path, normalize_url_path}; + +use crate::Pep508Url; /// A wrapper around [`Url`] that preserves the original string. #[derive(Debug, Clone, Eq, derivative::Derivative, serde::Deserialize, serde::Serialize)] @@ -182,8 +185,78 @@ impl Deref for VerbatimUrl { } } +impl From for VerbatimUrl { + fn from(url: Url) -> Self { + VerbatimUrl::from_url(url) + } +} + +impl Pep508Url for VerbatimUrl { + type Err = VerbatimUrlError; + + /// Create a `VerbatimUrl` to represent the requirement. + fn parse_url( + url: &str, + #[cfg_attr(not(feature = "non-pep508-extensions"), allow(unused_variables))] + working_dir: Option<&Path>, + ) -> Result { + // Expand environment variables in the URL. + let expanded = expand_env_vars(url); + + if let Some((scheme, path)) = split_scheme(&expanded) { + match Scheme::parse(scheme) { + // Ex) `file:///home/ferris/project/scripts/...`, `file://localhost/home/ferris/project/scripts/...`, or `file:../ferris/` + Some(Scheme::File) => { + // Strip the leading slashes, along with the `localhost` host, if present. + let path = strip_host(path); + + // Transform, e.g., `/C:/Users/ferris/wheel-0.42.0.tar.gz` to `C:\Users\ferris\wheel-0.42.0.tar.gz`. + let path = normalize_url_path(path); + + #[cfg(feature = "non-pep508-extensions")] + if let Some(working_dir) = working_dir { + return Ok(VerbatimUrl::parse_path(path.as_ref(), working_dir) + .with_given(url.to_string())); + } + + Ok( + VerbatimUrl::parse_absolute_path(path.as_ref())? + .with_given(url.to_string()), + ) + } + // Ex) `https://download.pytorch.org/whl/torch_stable.html` + Some(_) => { + // Ex) `https://download.pytorch.org/whl/torch_stable.html` + Ok(VerbatimUrl::parse_url(expanded.as_ref())?.with_given(url.to_string())) + } + + // Ex) `C:\Users\ferris\wheel-0.42.0.tar.gz` + _ => { + #[cfg(feature = "non-pep508-extensions")] + if let Some(working_dir) = working_dir { + return Ok(VerbatimUrl::parse_path(expanded.as_ref(), working_dir) + .with_given(url.to_string())); + } + + Ok(VerbatimUrl::parse_absolute_path(expanded.as_ref())? + .with_given(url.to_string())) + } + } + } else { + // Ex) `../editable/` + #[cfg(feature = "non-pep508-extensions")] + if let Some(working_dir) = working_dir { + return Ok(VerbatimUrl::parse_path(expanded.as_ref(), working_dir) + .with_given(url.to_string())); + } + + Ok(VerbatimUrl::parse_absolute_path(expanded.as_ref())?.with_given(url.to_string())) + } + } +} + /// An error that can occur when parsing a [`VerbatimUrl`]. -#[derive(thiserror::Error, Debug)] +#[derive(Error, Debug)] pub enum VerbatimUrlError { /// Failed to parse a URL. #[error(transparent)] diff --git a/crates/pypi-types/src/lenient_requirement.rs b/crates/pypi-types/src/lenient_requirement.rs index 69f72fb14..461730c47 100644 --- a/crates/pypi-types/src/lenient_requirement.rs +++ b/crates/pypi-types/src/lenient_requirement.rs @@ -7,7 +7,7 @@ use serde::{de, Deserialize, Deserializer, Serialize}; use tracing::warn; use pep440_rs::{VersionSpecifiers, VersionSpecifiersParseError}; -use pep508_rs::{Pep508Error, Requirement}; +use pep508_rs::{Pep508Error, Pep508Url, Requirement, VerbatimUrl}; /// Ex) `>=7.2.0<8.0.0` static MISSING_COMMA: Lazy = Lazy::new(|| Regex::new(r"(\d)([<>=~^!])").unwrap()); @@ -114,18 +114,18 @@ fn parse_with_fixups>(input: &str, type_name: &str) - /// Like [`Requirement`], but attempts to correct some common errors in user-provided requirements. #[derive(Debug, Clone, Serialize, Deserialize, Eq, PartialEq)] -pub struct LenientRequirement(Requirement); +pub struct LenientRequirement(Requirement); -impl FromStr for LenientRequirement { - type Err = Pep508Error; +impl FromStr for LenientRequirement { + type Err = Pep508Error; fn from_str(input: &str) -> Result { Ok(Self(parse_with_fixups(input, "requirement")?)) } } -impl From for Requirement { - fn from(requirement: LenientRequirement) -> Self { +impl From> for Requirement { + fn from(requirement: LenientRequirement) -> Self { requirement.0 } } diff --git a/crates/pypi-types/src/metadata.rs b/crates/pypi-types/src/metadata.rs index a5653ed91..9696d5e82 100644 --- a/crates/pypi-types/src/metadata.rs +++ b/crates/pypi-types/src/metadata.rs @@ -9,7 +9,7 @@ use thiserror::Error; use tracing::warn; use pep440_rs::{Version, VersionParseError, VersionSpecifiers, VersionSpecifiersParseError}; -use pep508_rs::{Pep508Error, Requirement}; +use pep508_rs::{Pep508Error, Requirement, VerbatimUrl}; use uv_normalize::{ExtraName, InvalidNameError, PackageName}; use crate::lenient_requirement::LenientRequirement; @@ -29,7 +29,7 @@ pub struct Metadata23 { pub name: PackageName, pub version: Version, // Optional fields - pub requires_dist: Vec, + pub requires_dist: Vec>, pub requires_python: Option, pub provides_extras: Vec, } @@ -50,7 +50,7 @@ pub enum MetadataError { #[error(transparent)] Pep440Error(#[from] VersionSpecifiersParseError), #[error(transparent)] - Pep508Error(#[from] Pep508Error), + Pep508Error(#[from] Pep508Error), #[error(transparent)] InvalidName(#[from] InvalidNameError), #[error("Invalid `Metadata-Version` field: {0}")]