diff --git a/crates/distribution-types/src/error.rs b/crates/distribution-types/src/error.rs index efaefa99d..1a5078460 100644 --- a/crates/distribution-types/src/error.rs +++ b/crates/distribution-types/src/error.rs @@ -16,4 +16,7 @@ pub enum Error { #[error("Distribution not found at: {0}")] NotFound(Url), + + #[error("Unsupported scheme `{0}` on URL: {1} ({2})")] + UnsupportedScheme(String, String, String), } diff --git a/crates/distribution-types/src/index_url.rs b/crates/distribution-types/src/index_url.rs index 9270f7e09..e884a3531 100644 --- a/crates/distribution-types/src/index_url.rs +++ b/crates/distribution-types/src/index_url.rs @@ -8,7 +8,7 @@ use once_cell::sync::Lazy; use serde::{Deserialize, Serialize}; use url::Url; -use pep508_rs::split_scheme; +use pep508_rs::{split_scheme, Scheme}; use uv_fs::normalize_url_path; static PYPI_URL: Lazy = Lazy::new(|| Url::parse("https://pypi.org/simple").unwrap()); @@ -88,19 +88,29 @@ impl FromStr for FlatIndexLocation { /// - `https://download.pytorch.org/whl/torch_stable.html` fn from_str(s: &str) -> Result { if let Some((scheme, path)) = split_scheme(s) { - if scheme == "file" { + match Scheme::parse(scheme) { // Ex) `file:///home/ferris/project/scripts/...` or `file:../ferris/` - let path = path.strip_prefix("//").unwrap_or(path); + Some(Scheme::File) => { + let path = path.strip_prefix("//").unwrap_or(path); - // Transform, e.g., `/C:/Users/ferris/wheel-0.42.0.tar.gz` to `C:\Users\ferris\wheel-0.42.0.tar.gz`. - let path = normalize_url_path(path); + // Transform, e.g., `/C:/Users/ferris/wheel-0.42.0.tar.gz` to `C:\Users\ferris\wheel-0.42.0.tar.gz`. + let path = normalize_url_path(path); + + let path = PathBuf::from(path.as_ref()); + Ok(Self::Path(path)) + } - let path = PathBuf::from(path.as_ref()); - Ok(Self::Path(path)) - } else { // Ex) `https://download.pytorch.org/whl/torch_stable.html` - let url = Url::parse(s)?; - Ok(Self::Url(url)) + Some(_) => { + let url = Url::parse(s)?; + Ok(Self::Url(url)) + } + + // Ex) `C:\Users\ferris\wheel-0.42.0.tar.gz` + None => { + let path = PathBuf::from(s); + Ok(Self::Path(path)) + } } } else { // Ex) `../ferris/` diff --git a/crates/distribution-types/src/lib.rs b/crates/distribution-types/src/lib.rs index 5c291741e..2904c2bdd 100644 --- a/crates/distribution-types/src/lib.rs +++ b/crates/distribution-types/src/lib.rs @@ -41,7 +41,7 @@ use url::Url; use distribution_filename::{DistFilename, SourceDistFilename, WheelFilename}; use pep440_rs::Version; -use pep508_rs::VerbatimUrl; +use pep508_rs::{Scheme, VerbatimUrl}; use uv_normalize::PackageName; pub use crate::any::*; @@ -223,56 +223,108 @@ impl Dist { /// Create a [`Dist`] for a URL-based distribution. pub fn from_url(name: PackageName, url: VerbatimUrl) -> Result { - if url.scheme().starts_with("git+") { - return Ok(Self::Source(SourceDist::Git(GitSourceDist { name, url }))); - } - - if url.scheme().eq_ignore_ascii_case("file") { - // Store the canonicalized path, which also serves to validate that it exists. - let path = match url - .to_file_path() - .map_err(|()| Error::UrlFilename(url.to_url()))? - .canonicalize() - { - Ok(path) => path, - Err(err) if err.kind() == std::io::ErrorKind::NotFound => { - return Err(Error::NotFound(url.to_url())); + match Scheme::parse(url.scheme()) { + Some(Scheme::Http | Scheme::Https) => { + if Path::new(url.path()) + .extension() + .is_some_and(|ext| ext.eq_ignore_ascii_case("whl")) + { + Ok(Self::Built(BuiltDist::DirectUrl(DirectUrlBuiltDist { + filename: WheelFilename::from_str(&url.filename()?)?, + url, + }))) + } else { + Ok(Self::Source(SourceDist::DirectUrl(DirectUrlSourceDist { + name, + url, + }))) } - Err(err) => return Err(err.into()), - }; + } + Some(Scheme::File) => { + // Store the canonicalized path, which also serves to validate that it exists. + let path = match url + .to_file_path() + .map_err(|()| Error::UrlFilename(url.to_url()))? + .canonicalize() + { + Ok(path) => path, + Err(err) if err.kind() == std::io::ErrorKind::NotFound => { + return Err(Error::NotFound(url.to_url())); + } + Err(err) => return Err(err.into()), + }; - return if path - .extension() - .is_some_and(|ext| ext.eq_ignore_ascii_case("whl")) - { - Ok(Self::Built(BuiltDist::Path(PathBuiltDist { - filename: WheelFilename::from_str(&url.filename()?)?, - url, - path, - }))) - } else { - Ok(Self::Source(SourceDist::Path(PathSourceDist { - name, - url, - path, - editable: false, - }))) - }; - } - - if Path::new(url.path()) - .extension() - .is_some_and(|ext| ext.eq_ignore_ascii_case("whl")) - { - Ok(Self::Built(BuiltDist::DirectUrl(DirectUrlBuiltDist { - filename: WheelFilename::from_str(&url.filename()?)?, - url, - }))) - } else { - Ok(Self::Source(SourceDist::DirectUrl(DirectUrlSourceDist { - name, - url, - }))) + if path + .extension() + .is_some_and(|ext| ext.eq_ignore_ascii_case("whl")) + { + Ok(Self::Built(BuiltDist::Path(PathBuiltDist { + filename: WheelFilename::from_str(&url.filename()?)?, + url, + path, + }))) + } else { + Ok(Self::Source(SourceDist::Path(PathSourceDist { + name, + url, + path, + editable: false, + }))) + } + } + Some(Scheme::GitSsh | Scheme::GitHttps) => { + Ok(Self::Source(SourceDist::Git(GitSourceDist { name, url }))) + } + Some(Scheme::GitGit | Scheme::GitHttp) => Err(Error::UnsupportedScheme( + url.scheme().to_owned(), + url.verbatim().to_string(), + "insecure Git protocol".to_string(), + )), + Some(Scheme::GitFile) => Err(Error::UnsupportedScheme( + url.scheme().to_owned(), + url.verbatim().to_string(), + "local Git protocol".to_string(), + )), + Some( + Scheme::BzrHttp + | Scheme::BzrHttps + | Scheme::BzrSsh + | Scheme::BzrSftp + | Scheme::BzrFtp + | Scheme::BzrLp + | Scheme::BzrFile, + ) => Err(Error::UnsupportedScheme( + url.scheme().to_owned(), + url.verbatim().to_string(), + "Bazaar is not supported".to_string(), + )), + Some( + Scheme::HgFile + | Scheme::HgHttp + | Scheme::HgHttps + | Scheme::HgSsh + | Scheme::HgStaticHttp, + ) => Err(Error::UnsupportedScheme( + url.scheme().to_owned(), + url.verbatim().to_string(), + "Mercurial is not supported".to_string(), + )), + Some( + Scheme::SvnSsh + | Scheme::SvnHttp + | Scheme::SvnHttps + | Scheme::SvnSvn + | Scheme::SvnFile, + ) => Err(Error::UnsupportedScheme( + url.scheme().to_owned(), + url.verbatim().to_string(), + "Subversion is not supported".to_string(), + )), + None => Err(Error::UnsupportedScheme( + url.scheme().to_owned(), + url.verbatim().to_string(), + "unknown scheme".to_string(), + )), } } diff --git a/crates/pep508-rs/src/lib.rs b/crates/pep508-rs/src/lib.rs index 98f7392be..c1a8c43d2 100644 --- a/crates/pep508-rs/src/lib.rs +++ b/crates/pep508-rs/src/lib.rs @@ -46,7 +46,7 @@ use uv_fs::normalize_url_path; #[cfg(feature = "pyo3")] use uv_normalize::InvalidNameError; use uv_normalize::{ExtraName, PackageName}; -pub use verbatim_url::{split_scheme, VerbatimUrl}; +pub use verbatim_url::{split_scheme, Scheme, VerbatimUrl}; mod marker; mod verbatim_url; @@ -744,33 +744,54 @@ fn preprocess_url( len: usize, ) -> Result { let url = if let Some((scheme, path)) = split_scheme(url) { - if scheme == "file" { + match Scheme::parse(scheme) { // Ex) `file:///home/ferris/project/scripts/...` or `file:../editable/`. - let path = path.strip_prefix("//").unwrap_or(path); + Some(Scheme::File) => { + let path = path.strip_prefix("//").unwrap_or(path); - // Transform, e.g., `/C:/Users/ferris/wheel-0.42.0.tar.gz` to `C:\Users\ferris\wheel-0.42.0.tar.gz`. - let path = normalize_url_path(path); + // Transform, e.g., `/C:/Users/ferris/wheel-0.42.0.tar.gz` to `C:\Users\ferris\wheel-0.42.0.tar.gz`. + let path = normalize_url_path(path); - if let Some(working_dir) = working_dir { - VerbatimUrl::from_path(path, working_dir).with_given(url.to_string()) - } else { - VerbatimUrl::from_absolute_path(path) - .map_err(|err| Pep508Error { - message: Pep508ErrorSource::UrlError(err), - start, - len, - input: cursor.to_string(), - })? - .with_given(url.to_string()) + if let Some(working_dir) = working_dir { + VerbatimUrl::from_path(path, working_dir).with_given(url.to_string()) + } else { + VerbatimUrl::from_absolute_path(path) + .map_err(|err| Pep508Error { + message: Pep508ErrorSource::UrlError(err), + start, + len, + input: cursor.to_string(), + })? + .with_given(url.to_string()) + } } - } else { + // Ex) `https://download.pytorch.org/whl/torch_stable.html` - VerbatimUrl::from_str(url).map_err(|err| Pep508Error { - message: Pep508ErrorSource::UrlError(err), - start, - len, - input: cursor.to_string(), - })? + Some(_) => { + // Ex) `https://download.pytorch.org/whl/torch_stable.html` + VerbatimUrl::from_str(url).map_err(|err| Pep508Error { + message: Pep508ErrorSource::UrlError(err), + start, + len, + input: cursor.to_string(), + })? + } + + // Ex) `C:\Users\ferris\wheel-0.42.0.tar.gz` + _ => { + if let Some(working_dir) = working_dir { + VerbatimUrl::from_path(url, working_dir).with_given(url.to_string()) + } else { + VerbatimUrl::from_absolute_path(url) + .map_err(|err| Pep508Error { + message: Pep508ErrorSource::UrlError(err), + start, + len, + input: cursor.to_string(), + })? + .with_given(url.to_string()) + } + } } } else { // Ex) `../editable/` diff --git a/crates/pep508-rs/src/verbatim_url.rs b/crates/pep508-rs/src/verbatim_url.rs index 74494b134..f4208e053 100644 --- a/crates/pep508-rs/src/verbatim_url.rs +++ b/crates/pep508-rs/src/verbatim_url.rs @@ -251,6 +251,127 @@ pub fn split_scheme(s: &str) -> Option<(&str, &str)> { Some((scheme, rest)) } +/// A supported URL scheme for PEP 508 direct-URL requirements. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum Scheme { + /// `file://...` + File, + /// `git+git://...` + GitGit, + /// `git+http://...` + GitHttp, + /// `git+file://...` + GitFile, + /// `git+ssh://...` + GitSsh, + /// `git+https://...` + GitHttps, + /// `bzr+http://...` + BzrHttp, + /// `bzr+https://...` + BzrHttps, + /// `bzr+ssh://...` + BzrSsh, + /// `bzr+sftp://...` + BzrSftp, + /// `bzr+ftp://...` + BzrFtp, + /// `bzr+lp://...` + BzrLp, + /// `bzr+file://...` + BzrFile, + /// `hg+file://...` + HgFile, + /// `hg+http://...` + HgHttp, + /// `hg+https://...` + HgHttps, + /// `hg+ssh://...` + HgSsh, + /// `hg+static-http://...` + HgStaticHttp, + /// `svn+ssh://...` + SvnSsh, + /// `svn+http://...` + SvnHttp, + /// `svn+https://...` + SvnHttps, + /// `svn+svn://...` + SvnSvn, + /// `svn+file://...` + SvnFile, + /// `http://...` + Http, + /// `https://...` + Https, +} + +impl Scheme { + /// Determine the [`Scheme`] from the given string, if possible. + pub fn parse(s: &str) -> Option { + match s { + "file" => Some(Self::File), + "git+git" => Some(Self::GitGit), + "git+http" => Some(Self::GitHttp), + "git+file" => Some(Self::GitFile), + "git+ssh" => Some(Self::GitSsh), + "git+https" => Some(Self::GitHttps), + "bzr+http" => Some(Self::BzrHttp), + "bzr+https" => Some(Self::BzrHttps), + "bzr+ssh" => Some(Self::BzrSsh), + "bzr+sftp" => Some(Self::BzrSftp), + "bzr+ftp" => Some(Self::BzrFtp), + "bzr+lp" => Some(Self::BzrLp), + "bzr+file" => Some(Self::BzrFile), + "hg+file" => Some(Self::HgFile), + "hg+http" => Some(Self::HgHttp), + "hg+https" => Some(Self::HgHttps), + "hg+ssh" => Some(Self::HgSsh), + "hg+static-http" => Some(Self::HgStaticHttp), + "svn+ssh" => Some(Self::SvnSsh), + "svn+http" => Some(Self::SvnHttp), + "svn+https" => Some(Self::SvnHttps), + "svn+svn" => Some(Self::SvnSvn), + "svn+file" => Some(Self::SvnFile), + "http" => Some(Self::Http), + "https" => Some(Self::Https), + _ => None, + } + } +} + +impl std::fmt::Display for Scheme { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Self::File => write!(f, "file"), + Self::GitGit => write!(f, "git+git"), + Self::GitHttp => write!(f, "git+http"), + Self::GitFile => write!(f, "git+file"), + Self::GitSsh => write!(f, "git+ssh"), + Self::GitHttps => write!(f, "git+https"), + Self::BzrHttp => write!(f, "bzr+http"), + Self::BzrHttps => write!(f, "bzr+https"), + Self::BzrSsh => write!(f, "bzr+ssh"), + Self::BzrSftp => write!(f, "bzr+sftp"), + Self::BzrFtp => write!(f, "bzr+ftp"), + Self::BzrLp => write!(f, "bzr+lp"), + Self::BzrFile => write!(f, "bzr+file"), + Self::HgFile => write!(f, "hg+file"), + Self::HgHttp => write!(f, "hg+http"), + Self::HgHttps => write!(f, "hg+https"), + Self::HgSsh => write!(f, "hg+ssh"), + Self::HgStaticHttp => write!(f, "hg+static-http"), + Self::SvnSsh => write!(f, "svn+ssh"), + Self::SvnHttp => write!(f, "svn+http"), + Self::SvnHttps => write!(f, "svn+https"), + Self::SvnSvn => write!(f, "svn+svn"), + Self::SvnFile => write!(f, "svn+file"), + Self::Http => write!(f, "http"), + Self::Https => write!(f, "https"), + } + } +} + #[cfg(test)] mod tests { use super::*; diff --git a/crates/requirements-txt/src/lib.rs b/crates/requirements-txt/src/lib.rs index aac4c3bb3..901e505a2 100644 --- a/crates/requirements-txt/src/lib.rs +++ b/crates/requirements-txt/src/lib.rs @@ -45,7 +45,9 @@ use unscanny::{Pattern, Scanner}; use url::Url; use uv_warnings::warn_user; -use pep508_rs::{split_scheme, Extras, Pep508Error, Pep508ErrorSource, Requirement, VerbatimUrl}; +use pep508_rs::{ + split_scheme, Extras, Pep508Error, Pep508ErrorSource, Requirement, Scheme, VerbatimUrl, +}; use uv_fs::{normalize_url_path, Normalized}; use uv_normalize::ExtraName; @@ -93,24 +95,39 @@ impl FindLink { /// - `https://download.pytorch.org/whl/torch_stable.html` pub fn parse(given: &str, working_dir: impl AsRef) -> Result { if let Some((scheme, path)) = split_scheme(given) { - if scheme == "file" { + match Scheme::parse(scheme) { // Ex) `file:///home/ferris/project/scripts/...` or `file:../ferris/` - let path = path.strip_prefix("//").unwrap_or(path); + Some(Scheme::File) => { + let path = path.strip_prefix("//").unwrap_or(path); - // Transform, e.g., `/C:/Users/ferris/wheel-0.42.0.tar.gz` to `C:\Users\ferris\wheel-0.42.0.tar.gz`. - let path = normalize_url_path(path); + // Transform, e.g., `/C:/Users/ferris/wheel-0.42.0.tar.gz` to `C:\Users\ferris\wheel-0.42.0.tar.gz`. + let path = normalize_url_path(path); + + let path = PathBuf::from(path.as_ref()); + let path = if path.is_absolute() { + path + } else { + working_dir.as_ref().join(path) + }; + Ok(Self::Path(path)) + } - let path = PathBuf::from(path.as_ref()); - let path = if path.is_absolute() { - path - } else { - working_dir.as_ref().join(path) - }; - Ok(Self::Path(path)) - } else { // Ex) `https://download.pytorch.org/whl/torch_stable.html` - let url = Url::parse(given)?; - Ok(Self::Url(url)) + Some(_) => { + let url = Url::parse(given)?; + Ok(Self::Url(url)) + } + + // Ex) `C:/Users/ferris/wheel-0.42.0.tar.gz` + _ => { + let path = PathBuf::from(given); + let path = if path.is_absolute() { + path + } else { + working_dir.as_ref().join(path) + }; + Ok(Self::Path(path)) + } } } else { // Ex) `../ferris/` @@ -190,19 +207,26 @@ impl EditableRequirement { // Create a `VerbatimUrl` to represent the editable requirement. let url = if let Some((scheme, path)) = split_scheme(requirement) { - if scheme == "file" { + match Scheme::parse(scheme) { // Ex) `file:///home/ferris/project/scripts/...` or `file:../editable/` - let path = path.strip_prefix("//").unwrap_or(path); + Some(Scheme::File) => { + let path = path.strip_prefix("//").unwrap_or(path); - // Transform, e.g., `/C:/Users/ferris/wheel-0.42.0.tar.gz` to `C:\Users\ferris\wheel-0.42.0.tar.gz`. - let path = normalize_url_path(path); + // Transform, e.g., `/C:/Users/ferris/wheel-0.42.0.tar.gz` to `C:\Users\ferris\wheel-0.42.0.tar.gz`. + let path = normalize_url_path(path); + + VerbatimUrl::from_path(path, working_dir.as_ref()) + } - VerbatimUrl::from_path(path, working_dir.as_ref()) - } else { // Ex) `https://download.pytorch.org/whl/torch_stable.html` - return Err(RequirementsTxtParserError::UnsupportedUrl( - requirement.to_string(), - )); + Some(_) => { + return Err(RequirementsTxtParserError::UnsupportedUrl( + requirement.to_string(), + )); + } + + // Ex) `C:/Users/ferris/wheel-0.42.0.tar.gz` + _ => VerbatimUrl::from_path(requirement, working_dir.as_ref()), } } else { // Ex) `../editable/` diff --git a/crates/uv/tests/pip_compile.rs b/crates/uv/tests/pip_compile.rs index bd7dbcb69..b3af6e014 100644 --- a/crates/uv/tests/pip_compile.rs +++ b/crates/uv/tests/pip_compile.rs @@ -1617,10 +1617,11 @@ fn compile_exclude_newer() -> Result<()> { #[test] fn compile_wheel_path_dependency() -> Result<()> { let context = TestContext::new("3.12"); + // Download a wheel. let response = reqwest::blocking::get("https://files.pythonhosted.org/packages/36/42/015c23096649b908c809c69388a805a571a3bea44362fe87e33fc3afa01f/flask-3.0.0-py3-none-any.whl")?; let flask_wheel = context.temp_dir.child("flask-3.0.0-py3-none-any.whl"); - let mut flask_wheel_file = std::fs::File::create(&flask_wheel)?; + let mut flask_wheel_file = fs::File::create(&flask_wheel)?; std::io::copy(&mut response.bytes()?.as_ref(), &mut flask_wheel_file)?; let requirements_in = context.temp_dir.child("requirements.in"); @@ -1662,7 +1663,7 @@ fn compile_wheel_path_dependency() -> Result<()> { Resolved 7 packages in [TIME] "###); - // Run the same operation, but this time with a relative path. + // Run the same operation, but this time with a relative path, omitting the `//`. let requirements_in = context.temp_dir.child("requirements.in"); requirements_in.write_str("flask @ file:flask-3.0.0-py3-none-any.whl")?; @@ -1694,7 +1695,7 @@ fn compile_wheel_path_dependency() -> Result<()> { "### ); - // Run the same operation, but this time with a relative path. + // Run the same operation, but this time with a relative path, including the `//`. let requirements_in = context.temp_dir.child("requirements.in"); requirements_in.write_str("flask @ file://flask-3.0.0-py3-none-any.whl")?; @@ -1726,7 +1727,7 @@ fn compile_wheel_path_dependency() -> Result<()> { "### ); - // Run the same operation, but this time with a relative path. + // Run the same operation, but this time with a relative path, exclusive of any scheme. let requirements_in = context.temp_dir.child("requirements.in"); requirements_in.write_str("flask @ ./flask-3.0.0-py3-none-any.whl")?; @@ -1758,6 +1759,45 @@ fn compile_wheel_path_dependency() -> Result<()> { "### ); + // Run the same operation, but this time with an absolute path (rather than a URL). + let requirements_in = context.temp_dir.child("requirements.in"); + requirements_in.write_str(&format!("flask @ {}", flask_wheel.path().display()))?; + + // In addition to the standard filters, remove the temporary directory from the snapshot. + let filter_path = regex::escape(&flask_wheel.normalized_display().to_string()); + let filters: Vec<_> = [(filter_path.as_str(), "/[TEMP_DIR]/")] + .into_iter() + .chain(INSTA_FILTERS.to_vec()) + .collect(); + + uv_snapshot!(filters, context.compile() + .arg("requirements.in"), @r###" + success: true + exit_code: 0 + ----- stdout ----- + # This file was autogenerated by uv via the following command: + # uv pip compile --cache-dir [CACHE_DIR] --exclude-newer 2023-11-18T12:00:00Z requirements.in + blinker==1.7.0 + # via flask + click==8.1.7 + # via flask + flask @ /[TEMP_DIR]/ + itsdangerous==2.1.2 + # via flask + jinja2==3.1.2 + # via flask + markupsafe==2.1.3 + # via + # jinja2 + # werkzeug + werkzeug==3.0.1 + # via flask + + ----- stderr ----- + Resolved 7 packages in [TIME] + "### + ); + Ok(()) } @@ -3635,3 +3675,25 @@ fn index_url_from_command_line() -> Result<()> { Ok(()) } + +/// Resolve a package from a `requirements.in` file with a dependency that uses an unsupported +/// scheme. +#[test] +fn unsupported_scheme() -> Result<()> { + let context = TestContext::new("3.12"); + let requirements_in = context.temp_dir.child("requirements.in"); + requirements_in.write_str("anyio @ bzr+https://example.com/anyio")?; + + uv_snapshot!(context.compile() + .arg("requirements.in"), @r###" + success: false + exit_code: 2 + ----- stdout ----- + + ----- stderr ----- + error: Unsupported scheme `bzr+https` on URL: bzr+https://example.com/anyio (Bazaar is not supported) + "### + ); + + Ok(()) +}