Add support for absolute paths on Windows (#1725)

## Summary

The main change is that we need to have an explicit list of protocols we
_do_ support (like `https`), so that when we see a Windows absolute path
(`C:\...`), we don't treat the `C` as a protocol itself.

Closes https://github.com/astral-sh/uv/issues/1539.
This commit is contained in:
Charlie Marsh 2024-02-19 20:36:53 -05:00 committed by GitHub
parent 8f739c9b23
commit c05080a3e3
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
7 changed files with 403 additions and 110 deletions

View file

@ -16,4 +16,7 @@ pub enum Error {
#[error("Distribution not found at: {0}")]
NotFound(Url),
#[error("Unsupported scheme `{0}` on URL: {1} ({2})")]
UnsupportedScheme(String, String, String),
}

View file

@ -8,7 +8,7 @@ use once_cell::sync::Lazy;
use serde::{Deserialize, Serialize};
use url::Url;
use pep508_rs::split_scheme;
use pep508_rs::{split_scheme, Scheme};
use uv_fs::normalize_url_path;
static PYPI_URL: Lazy<Url> = Lazy::new(|| Url::parse("https://pypi.org/simple").unwrap());
@ -88,19 +88,29 @@ impl FromStr for FlatIndexLocation {
/// - `https://download.pytorch.org/whl/torch_stable.html`
fn from_str(s: &str) -> Result<Self, Self::Err> {
if let Some((scheme, path)) = split_scheme(s) {
if scheme == "file" {
match Scheme::parse(scheme) {
// Ex) `file:///home/ferris/project/scripts/...` or `file:../ferris/`
let path = path.strip_prefix("//").unwrap_or(path);
Some(Scheme::File) => {
let path = path.strip_prefix("//").unwrap_or(path);
// Transform, e.g., `/C:/Users/ferris/wheel-0.42.0.tar.gz` to `C:\Users\ferris\wheel-0.42.0.tar.gz`.
let path = normalize_url_path(path);
// Transform, e.g., `/C:/Users/ferris/wheel-0.42.0.tar.gz` to `C:\Users\ferris\wheel-0.42.0.tar.gz`.
let path = normalize_url_path(path);
let path = PathBuf::from(path.as_ref());
Ok(Self::Path(path))
}
let path = PathBuf::from(path.as_ref());
Ok(Self::Path(path))
} else {
// Ex) `https://download.pytorch.org/whl/torch_stable.html`
let url = Url::parse(s)?;
Ok(Self::Url(url))
Some(_) => {
let url = Url::parse(s)?;
Ok(Self::Url(url))
}
// Ex) `C:\Users\ferris\wheel-0.42.0.tar.gz`
None => {
let path = PathBuf::from(s);
Ok(Self::Path(path))
}
}
} else {
// Ex) `../ferris/`

View file

@ -41,7 +41,7 @@ use url::Url;
use distribution_filename::{DistFilename, SourceDistFilename, WheelFilename};
use pep440_rs::Version;
use pep508_rs::VerbatimUrl;
use pep508_rs::{Scheme, VerbatimUrl};
use uv_normalize::PackageName;
pub use crate::any::*;
@ -223,56 +223,108 @@ impl Dist {
/// Create a [`Dist`] for a URL-based distribution.
pub fn from_url(name: PackageName, url: VerbatimUrl) -> Result<Self, Error> {
if url.scheme().starts_with("git+") {
return Ok(Self::Source(SourceDist::Git(GitSourceDist { name, url })));
}
if url.scheme().eq_ignore_ascii_case("file") {
// Store the canonicalized path, which also serves to validate that it exists.
let path = match url
.to_file_path()
.map_err(|()| Error::UrlFilename(url.to_url()))?
.canonicalize()
{
Ok(path) => path,
Err(err) if err.kind() == std::io::ErrorKind::NotFound => {
return Err(Error::NotFound(url.to_url()));
match Scheme::parse(url.scheme()) {
Some(Scheme::Http | Scheme::Https) => {
if Path::new(url.path())
.extension()
.is_some_and(|ext| ext.eq_ignore_ascii_case("whl"))
{
Ok(Self::Built(BuiltDist::DirectUrl(DirectUrlBuiltDist {
filename: WheelFilename::from_str(&url.filename()?)?,
url,
})))
} else {
Ok(Self::Source(SourceDist::DirectUrl(DirectUrlSourceDist {
name,
url,
})))
}
Err(err) => return Err(err.into()),
};
}
Some(Scheme::File) => {
// Store the canonicalized path, which also serves to validate that it exists.
let path = match url
.to_file_path()
.map_err(|()| Error::UrlFilename(url.to_url()))?
.canonicalize()
{
Ok(path) => path,
Err(err) if err.kind() == std::io::ErrorKind::NotFound => {
return Err(Error::NotFound(url.to_url()));
}
Err(err) => return Err(err.into()),
};
return if path
.extension()
.is_some_and(|ext| ext.eq_ignore_ascii_case("whl"))
{
Ok(Self::Built(BuiltDist::Path(PathBuiltDist {
filename: WheelFilename::from_str(&url.filename()?)?,
url,
path,
})))
} else {
Ok(Self::Source(SourceDist::Path(PathSourceDist {
name,
url,
path,
editable: false,
})))
};
}
if Path::new(url.path())
.extension()
.is_some_and(|ext| ext.eq_ignore_ascii_case("whl"))
{
Ok(Self::Built(BuiltDist::DirectUrl(DirectUrlBuiltDist {
filename: WheelFilename::from_str(&url.filename()?)?,
url,
})))
} else {
Ok(Self::Source(SourceDist::DirectUrl(DirectUrlSourceDist {
name,
url,
})))
if path
.extension()
.is_some_and(|ext| ext.eq_ignore_ascii_case("whl"))
{
Ok(Self::Built(BuiltDist::Path(PathBuiltDist {
filename: WheelFilename::from_str(&url.filename()?)?,
url,
path,
})))
} else {
Ok(Self::Source(SourceDist::Path(PathSourceDist {
name,
url,
path,
editable: false,
})))
}
}
Some(Scheme::GitSsh | Scheme::GitHttps) => {
Ok(Self::Source(SourceDist::Git(GitSourceDist { name, url })))
}
Some(Scheme::GitGit | Scheme::GitHttp) => Err(Error::UnsupportedScheme(
url.scheme().to_owned(),
url.verbatim().to_string(),
"insecure Git protocol".to_string(),
)),
Some(Scheme::GitFile) => Err(Error::UnsupportedScheme(
url.scheme().to_owned(),
url.verbatim().to_string(),
"local Git protocol".to_string(),
)),
Some(
Scheme::BzrHttp
| Scheme::BzrHttps
| Scheme::BzrSsh
| Scheme::BzrSftp
| Scheme::BzrFtp
| Scheme::BzrLp
| Scheme::BzrFile,
) => Err(Error::UnsupportedScheme(
url.scheme().to_owned(),
url.verbatim().to_string(),
"Bazaar is not supported".to_string(),
)),
Some(
Scheme::HgFile
| Scheme::HgHttp
| Scheme::HgHttps
| Scheme::HgSsh
| Scheme::HgStaticHttp,
) => Err(Error::UnsupportedScheme(
url.scheme().to_owned(),
url.verbatim().to_string(),
"Mercurial is not supported".to_string(),
)),
Some(
Scheme::SvnSsh
| Scheme::SvnHttp
| Scheme::SvnHttps
| Scheme::SvnSvn
| Scheme::SvnFile,
) => Err(Error::UnsupportedScheme(
url.scheme().to_owned(),
url.verbatim().to_string(),
"Subversion is not supported".to_string(),
)),
None => Err(Error::UnsupportedScheme(
url.scheme().to_owned(),
url.verbatim().to_string(),
"unknown scheme".to_string(),
)),
}
}

View file

@ -46,7 +46,7 @@ use uv_fs::normalize_url_path;
#[cfg(feature = "pyo3")]
use uv_normalize::InvalidNameError;
use uv_normalize::{ExtraName, PackageName};
pub use verbatim_url::{split_scheme, VerbatimUrl};
pub use verbatim_url::{split_scheme, Scheme, VerbatimUrl};
mod marker;
mod verbatim_url;
@ -744,33 +744,54 @@ fn preprocess_url(
len: usize,
) -> Result<VerbatimUrl, Pep508Error> {
let url = if let Some((scheme, path)) = split_scheme(url) {
if scheme == "file" {
match Scheme::parse(scheme) {
// Ex) `file:///home/ferris/project/scripts/...` or `file:../editable/`.
let path = path.strip_prefix("//").unwrap_or(path);
Some(Scheme::File) => {
let path = path.strip_prefix("//").unwrap_or(path);
// Transform, e.g., `/C:/Users/ferris/wheel-0.42.0.tar.gz` to `C:\Users\ferris\wheel-0.42.0.tar.gz`.
let path = normalize_url_path(path);
// Transform, e.g., `/C:/Users/ferris/wheel-0.42.0.tar.gz` to `C:\Users\ferris\wheel-0.42.0.tar.gz`.
let path = normalize_url_path(path);
if let Some(working_dir) = working_dir {
VerbatimUrl::from_path(path, working_dir).with_given(url.to_string())
} else {
VerbatimUrl::from_absolute_path(path)
.map_err(|err| Pep508Error {
message: Pep508ErrorSource::UrlError(err),
start,
len,
input: cursor.to_string(),
})?
.with_given(url.to_string())
if let Some(working_dir) = working_dir {
VerbatimUrl::from_path(path, working_dir).with_given(url.to_string())
} else {
VerbatimUrl::from_absolute_path(path)
.map_err(|err| Pep508Error {
message: Pep508ErrorSource::UrlError(err),
start,
len,
input: cursor.to_string(),
})?
.with_given(url.to_string())
}
}
} else {
// Ex) `https://download.pytorch.org/whl/torch_stable.html`
VerbatimUrl::from_str(url).map_err(|err| Pep508Error {
message: Pep508ErrorSource::UrlError(err),
start,
len,
input: cursor.to_string(),
})?
Some(_) => {
// Ex) `https://download.pytorch.org/whl/torch_stable.html`
VerbatimUrl::from_str(url).map_err(|err| Pep508Error {
message: Pep508ErrorSource::UrlError(err),
start,
len,
input: cursor.to_string(),
})?
}
// Ex) `C:\Users\ferris\wheel-0.42.0.tar.gz`
_ => {
if let Some(working_dir) = working_dir {
VerbatimUrl::from_path(url, working_dir).with_given(url.to_string())
} else {
VerbatimUrl::from_absolute_path(url)
.map_err(|err| Pep508Error {
message: Pep508ErrorSource::UrlError(err),
start,
len,
input: cursor.to_string(),
})?
.with_given(url.to_string())
}
}
}
} else {
// Ex) `../editable/`

View file

@ -251,6 +251,127 @@ pub fn split_scheme(s: &str) -> Option<(&str, &str)> {
Some((scheme, rest))
}
/// A supported URL scheme for PEP 508 direct-URL requirements.
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum Scheme {
/// `file://...`
File,
/// `git+git://...`
GitGit,
/// `git+http://...`
GitHttp,
/// `git+file://...`
GitFile,
/// `git+ssh://...`
GitSsh,
/// `git+https://...`
GitHttps,
/// `bzr+http://...`
BzrHttp,
/// `bzr+https://...`
BzrHttps,
/// `bzr+ssh://...`
BzrSsh,
/// `bzr+sftp://...`
BzrSftp,
/// `bzr+ftp://...`
BzrFtp,
/// `bzr+lp://...`
BzrLp,
/// `bzr+file://...`
BzrFile,
/// `hg+file://...`
HgFile,
/// `hg+http://...`
HgHttp,
/// `hg+https://...`
HgHttps,
/// `hg+ssh://...`
HgSsh,
/// `hg+static-http://...`
HgStaticHttp,
/// `svn+ssh://...`
SvnSsh,
/// `svn+http://...`
SvnHttp,
/// `svn+https://...`
SvnHttps,
/// `svn+svn://...`
SvnSvn,
/// `svn+file://...`
SvnFile,
/// `http://...`
Http,
/// `https://...`
Https,
}
impl Scheme {
/// Determine the [`Scheme`] from the given string, if possible.
pub fn parse(s: &str) -> Option<Self> {
match s {
"file" => Some(Self::File),
"git+git" => Some(Self::GitGit),
"git+http" => Some(Self::GitHttp),
"git+file" => Some(Self::GitFile),
"git+ssh" => Some(Self::GitSsh),
"git+https" => Some(Self::GitHttps),
"bzr+http" => Some(Self::BzrHttp),
"bzr+https" => Some(Self::BzrHttps),
"bzr+ssh" => Some(Self::BzrSsh),
"bzr+sftp" => Some(Self::BzrSftp),
"bzr+ftp" => Some(Self::BzrFtp),
"bzr+lp" => Some(Self::BzrLp),
"bzr+file" => Some(Self::BzrFile),
"hg+file" => Some(Self::HgFile),
"hg+http" => Some(Self::HgHttp),
"hg+https" => Some(Self::HgHttps),
"hg+ssh" => Some(Self::HgSsh),
"hg+static-http" => Some(Self::HgStaticHttp),
"svn+ssh" => Some(Self::SvnSsh),
"svn+http" => Some(Self::SvnHttp),
"svn+https" => Some(Self::SvnHttps),
"svn+svn" => Some(Self::SvnSvn),
"svn+file" => Some(Self::SvnFile),
"http" => Some(Self::Http),
"https" => Some(Self::Https),
_ => None,
}
}
}
impl std::fmt::Display for Scheme {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Self::File => write!(f, "file"),
Self::GitGit => write!(f, "git+git"),
Self::GitHttp => write!(f, "git+http"),
Self::GitFile => write!(f, "git+file"),
Self::GitSsh => write!(f, "git+ssh"),
Self::GitHttps => write!(f, "git+https"),
Self::BzrHttp => write!(f, "bzr+http"),
Self::BzrHttps => write!(f, "bzr+https"),
Self::BzrSsh => write!(f, "bzr+ssh"),
Self::BzrSftp => write!(f, "bzr+sftp"),
Self::BzrFtp => write!(f, "bzr+ftp"),
Self::BzrLp => write!(f, "bzr+lp"),
Self::BzrFile => write!(f, "bzr+file"),
Self::HgFile => write!(f, "hg+file"),
Self::HgHttp => write!(f, "hg+http"),
Self::HgHttps => write!(f, "hg+https"),
Self::HgSsh => write!(f, "hg+ssh"),
Self::HgStaticHttp => write!(f, "hg+static-http"),
Self::SvnSsh => write!(f, "svn+ssh"),
Self::SvnHttp => write!(f, "svn+http"),
Self::SvnHttps => write!(f, "svn+https"),
Self::SvnSvn => write!(f, "svn+svn"),
Self::SvnFile => write!(f, "svn+file"),
Self::Http => write!(f, "http"),
Self::Https => write!(f, "https"),
}
}
}
#[cfg(test)]
mod tests {
use super::*;

View file

@ -45,7 +45,9 @@ use unscanny::{Pattern, Scanner};
use url::Url;
use uv_warnings::warn_user;
use pep508_rs::{split_scheme, Extras, Pep508Error, Pep508ErrorSource, Requirement, VerbatimUrl};
use pep508_rs::{
split_scheme, Extras, Pep508Error, Pep508ErrorSource, Requirement, Scheme, VerbatimUrl,
};
use uv_fs::{normalize_url_path, Normalized};
use uv_normalize::ExtraName;
@ -93,24 +95,39 @@ impl FindLink {
/// - `https://download.pytorch.org/whl/torch_stable.html`
pub fn parse(given: &str, working_dir: impl AsRef<Path>) -> Result<Self, url::ParseError> {
if let Some((scheme, path)) = split_scheme(given) {
if scheme == "file" {
match Scheme::parse(scheme) {
// Ex) `file:///home/ferris/project/scripts/...` or `file:../ferris/`
let path = path.strip_prefix("//").unwrap_or(path);
Some(Scheme::File) => {
let path = path.strip_prefix("//").unwrap_or(path);
// Transform, e.g., `/C:/Users/ferris/wheel-0.42.0.tar.gz` to `C:\Users\ferris\wheel-0.42.0.tar.gz`.
let path = normalize_url_path(path);
// Transform, e.g., `/C:/Users/ferris/wheel-0.42.0.tar.gz` to `C:\Users\ferris\wheel-0.42.0.tar.gz`.
let path = normalize_url_path(path);
let path = PathBuf::from(path.as_ref());
let path = if path.is_absolute() {
path
} else {
working_dir.as_ref().join(path)
};
Ok(Self::Path(path))
}
let path = PathBuf::from(path.as_ref());
let path = if path.is_absolute() {
path
} else {
working_dir.as_ref().join(path)
};
Ok(Self::Path(path))
} else {
// Ex) `https://download.pytorch.org/whl/torch_stable.html`
let url = Url::parse(given)?;
Ok(Self::Url(url))
Some(_) => {
let url = Url::parse(given)?;
Ok(Self::Url(url))
}
// Ex) `C:/Users/ferris/wheel-0.42.0.tar.gz`
_ => {
let path = PathBuf::from(given);
let path = if path.is_absolute() {
path
} else {
working_dir.as_ref().join(path)
};
Ok(Self::Path(path))
}
}
} else {
// Ex) `../ferris/`
@ -190,19 +207,26 @@ impl EditableRequirement {
// Create a `VerbatimUrl` to represent the editable requirement.
let url = if let Some((scheme, path)) = split_scheme(requirement) {
if scheme == "file" {
match Scheme::parse(scheme) {
// Ex) `file:///home/ferris/project/scripts/...` or `file:../editable/`
let path = path.strip_prefix("//").unwrap_or(path);
Some(Scheme::File) => {
let path = path.strip_prefix("//").unwrap_or(path);
// Transform, e.g., `/C:/Users/ferris/wheel-0.42.0.tar.gz` to `C:\Users\ferris\wheel-0.42.0.tar.gz`.
let path = normalize_url_path(path);
// Transform, e.g., `/C:/Users/ferris/wheel-0.42.0.tar.gz` to `C:\Users\ferris\wheel-0.42.0.tar.gz`.
let path = normalize_url_path(path);
VerbatimUrl::from_path(path, working_dir.as_ref())
}
VerbatimUrl::from_path(path, working_dir.as_ref())
} else {
// Ex) `https://download.pytorch.org/whl/torch_stable.html`
return Err(RequirementsTxtParserError::UnsupportedUrl(
requirement.to_string(),
));
Some(_) => {
return Err(RequirementsTxtParserError::UnsupportedUrl(
requirement.to_string(),
));
}
// Ex) `C:/Users/ferris/wheel-0.42.0.tar.gz`
_ => VerbatimUrl::from_path(requirement, working_dir.as_ref()),
}
} else {
// Ex) `../editable/`

View file

@ -1617,10 +1617,11 @@ fn compile_exclude_newer() -> Result<()> {
#[test]
fn compile_wheel_path_dependency() -> Result<()> {
let context = TestContext::new("3.12");
// Download a wheel.
let response = reqwest::blocking::get("https://files.pythonhosted.org/packages/36/42/015c23096649b908c809c69388a805a571a3bea44362fe87e33fc3afa01f/flask-3.0.0-py3-none-any.whl")?;
let flask_wheel = context.temp_dir.child("flask-3.0.0-py3-none-any.whl");
let mut flask_wheel_file = std::fs::File::create(&flask_wheel)?;
let mut flask_wheel_file = fs::File::create(&flask_wheel)?;
std::io::copy(&mut response.bytes()?.as_ref(), &mut flask_wheel_file)?;
let requirements_in = context.temp_dir.child("requirements.in");
@ -1662,7 +1663,7 @@ fn compile_wheel_path_dependency() -> Result<()> {
Resolved 7 packages in [TIME]
"###);
// Run the same operation, but this time with a relative path.
// Run the same operation, but this time with a relative path, omitting the `//`.
let requirements_in = context.temp_dir.child("requirements.in");
requirements_in.write_str("flask @ file:flask-3.0.0-py3-none-any.whl")?;
@ -1694,7 +1695,7 @@ fn compile_wheel_path_dependency() -> Result<()> {
"###
);
// Run the same operation, but this time with a relative path.
// Run the same operation, but this time with a relative path, including the `//`.
let requirements_in = context.temp_dir.child("requirements.in");
requirements_in.write_str("flask @ file://flask-3.0.0-py3-none-any.whl")?;
@ -1726,7 +1727,7 @@ fn compile_wheel_path_dependency() -> Result<()> {
"###
);
// Run the same operation, but this time with a relative path.
// Run the same operation, but this time with a relative path, exclusive of any scheme.
let requirements_in = context.temp_dir.child("requirements.in");
requirements_in.write_str("flask @ ./flask-3.0.0-py3-none-any.whl")?;
@ -1758,6 +1759,45 @@ fn compile_wheel_path_dependency() -> Result<()> {
"###
);
// Run the same operation, but this time with an absolute path (rather than a URL).
let requirements_in = context.temp_dir.child("requirements.in");
requirements_in.write_str(&format!("flask @ {}", flask_wheel.path().display()))?;
// In addition to the standard filters, remove the temporary directory from the snapshot.
let filter_path = regex::escape(&flask_wheel.normalized_display().to_string());
let filters: Vec<_> = [(filter_path.as_str(), "/[TEMP_DIR]/")]
.into_iter()
.chain(INSTA_FILTERS.to_vec())
.collect();
uv_snapshot!(filters, context.compile()
.arg("requirements.in"), @r###"
success: true
exit_code: 0
----- stdout -----
# This file was autogenerated by uv via the following command:
# uv pip compile --cache-dir [CACHE_DIR] --exclude-newer 2023-11-18T12:00:00Z requirements.in
blinker==1.7.0
# via flask
click==8.1.7
# via flask
flask @ /[TEMP_DIR]/
itsdangerous==2.1.2
# via flask
jinja2==3.1.2
# via flask
markupsafe==2.1.3
# via
# jinja2
# werkzeug
werkzeug==3.0.1
# via flask
----- stderr -----
Resolved 7 packages in [TIME]
"###
);
Ok(())
}
@ -3635,3 +3675,25 @@ fn index_url_from_command_line() -> Result<()> {
Ok(())
}
/// Resolve a package from a `requirements.in` file with a dependency that uses an unsupported
/// scheme.
#[test]
fn unsupported_scheme() -> Result<()> {
let context = TestContext::new("3.12");
let requirements_in = context.temp_dir.child("requirements.in");
requirements_in.write_str("anyio @ bzr+https://example.com/anyio")?;
uv_snapshot!(context.compile()
.arg("requirements.in"), @r###"
success: false
exit_code: 2
----- stdout -----
----- stderr -----
error: Unsupported scheme `bzr+https` on URL: bzr+https://example.com/anyio (Bazaar is not supported)
"###
);
Ok(())
}