Preserve verbatim URLs for --find-links (#4838)

Also gets rid of a lot of duplicated logic for `--find-links`.

Closes https://github.com/astral-sh/uv/issues/4797
This commit is contained in:
Charlie Marsh 2024-07-05 17:57:40 -04:00 committed by GitHub
parent c0ca0b02b8
commit 32ea636585
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
8 changed files with 171 additions and 214 deletions

View file

@ -1,7 +1,7 @@
use std::borrow::Cow;
use std::fmt::{Display, Formatter};
use std::ops::Deref;
use std::path::{Path, PathBuf};
use std::path::Path;
use std::str::FromStr;
use itertools::Either;
@ -9,8 +9,7 @@ use once_cell::sync::Lazy;
use thiserror::Error;
use url::{ParseError, Url};
use pep508_rs::{expand_env_vars, split_scheme, strip_host, Scheme, VerbatimUrl, VerbatimUrlError};
use uv_fs::normalize_url_path;
use pep508_rs::{VerbatimUrl, VerbatimUrlError};
use crate::Verbatim;
@ -91,6 +90,15 @@ impl Verbatim for IndexUrl {
}
}
impl From<FlatIndexLocation> for IndexUrl {
fn from(location: FlatIndexLocation) -> Self {
match location {
FlatIndexLocation::Path(url) => Self::Path(url),
FlatIndexLocation::Url(url) => Self::Url(url),
}
}
}
/// An error that can occur when parsing an [`IndexUrl`].
#[derive(Error, Debug)]
pub enum IndexUrlError {
@ -173,8 +181,8 @@ impl Deref for IndexUrl {
/// Also known as `--find-links`.
#[derive(Debug, Clone, Hash, Eq, PartialEq)]
pub enum FlatIndexLocation {
Path(PathBuf),
Url(Url),
Path(VerbatimUrl),
Url(VerbatimUrl),
}
#[cfg(feature = "schemars")]
@ -197,6 +205,60 @@ impl schemars::JsonSchema for FlatIndexLocation {
}
}
impl FlatIndexLocation {
/// Return the raw URL for the `--find-links` index.
pub fn url(&self) -> &Url {
match self {
Self::Url(url) => url.raw(),
Self::Path(url) => url.raw(),
}
}
/// Return the redacted URL for the `--find-links` index, omitting any sensitive credentials.
pub fn redacted(&self) -> Cow<'_, Url> {
let url = self.url();
if url.username().is_empty() && url.password().is_none() {
Cow::Borrowed(url)
} else {
let mut url = url.clone();
let _ = url.set_username("");
let _ = url.set_password(None);
Cow::Owned(url)
}
}
}
impl Display for FlatIndexLocation {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
match self {
Self::Url(url) => Display::fmt(url, f),
Self::Path(url) => Display::fmt(url, f),
}
}
}
impl Verbatim for FlatIndexLocation {
fn verbatim(&self) -> Cow<'_, str> {
match self {
Self::Url(url) => url.verbatim(),
Self::Path(url) => url.verbatim(),
}
}
}
impl FromStr for FlatIndexLocation {
type Err = IndexUrlError;
fn from_str(s: &str) -> Result<Self, Self::Err> {
let url = if let Ok(path) = Path::new(s).canonicalize() {
VerbatimUrl::from_path(path)?
} else {
VerbatimUrl::parse_url(s)?
};
Ok(Self::from(url.with_given(s)))
}
}
impl serde::ser::Serialize for FlatIndexLocation {
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
where
@ -216,60 +278,12 @@ impl<'de> serde::de::Deserialize<'de> for FlatIndexLocation {
}
}
impl FromStr for FlatIndexLocation {
type Err = url::ParseError;
/// Parse a raw string for a `--find-links` entry, which could be a URL or a local path.
///
/// For example:
/// - `file:///home/ferris/project/scripts/...`
/// - `file:../ferris/`
/// - `../ferris/`
/// - `https://download.pytorch.org/whl/torch_stable.html`
fn from_str(s: &str) -> Result<Self, Self::Err> {
// Expand environment variables.
let expanded = expand_env_vars(s);
// Parse the expanded path.
if let Some((scheme, path)) = split_scheme(&expanded) {
match Scheme::parse(scheme) {
// Ex) `file:///home/ferris/project/scripts/...`, `file://localhost/home/ferris/project/scripts/...`, or `file:../ferris/`
Some(Scheme::File) => {
// Strip the leading slashes, along with the `localhost` host, if present.
let path = strip_host(path);
// Transform, e.g., `/C:/Users/ferris/wheel-0.42.0.tar.gz` to `C:\Users\ferris\wheel-0.42.0.tar.gz`.
let path = normalize_url_path(path);
let path = PathBuf::from(path.as_ref());
Ok(Self::Path(path))
}
// Ex) `https://download.pytorch.org/whl/torch_stable.html`
Some(_) => {
let url = Url::parse(expanded.as_ref())?;
Ok(Self::Url(url))
}
// Ex) `C:\Users\ferris\wheel-0.42.0.tar.gz`
None => {
let path = PathBuf::from(expanded.as_ref());
Ok(Self::Path(path))
}
}
impl From<VerbatimUrl> for FlatIndexLocation {
fn from(url: VerbatimUrl) -> Self {
if url.scheme() == "file" {
Self::Path(url)
} else {
// Ex) `../ferris/`
let path = PathBuf::from(expanded.as_ref());
Ok(Self::Path(path))
}
}
}
impl Display for FlatIndexLocation {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
match self {
Self::Path(path) => Display::fmt(&path.display(), f),
Self::Url(url) => Display::fmt(url, f),
Self::Url(url)
}
}
}
@ -387,7 +401,7 @@ impl<'a> IndexLocations {
.map(IndexUrl::url)
.chain(self.flat_index.iter().filter_map(|index| match index {
FlatIndexLocation::Path(_) => None,
FlatIndexLocation::Url(url) => Some(url),
FlatIndexLocation::Url(url) => Some(url.raw()),
}))
}
}
@ -459,32 +473,3 @@ impl From<IndexLocations> for IndexUrls {
}
}
}
#[cfg(test)]
#[cfg(unix)]
mod test {
use super::*;
#[test]
fn parse_find_links() {
assert_eq!(
FlatIndexLocation::from_str("file:///home/ferris/project/scripts/...").unwrap(),
FlatIndexLocation::Path(PathBuf::from("/home/ferris/project/scripts/..."))
);
assert_eq!(
FlatIndexLocation::from_str("file:../ferris/").unwrap(),
FlatIndexLocation::Path(PathBuf::from("../ferris/"))
);
assert_eq!(
FlatIndexLocation::from_str("../ferris/").unwrap(),
FlatIndexLocation::Path(PathBuf::from("../ferris/"))
);
assert_eq!(
FlatIndexLocation::from_str("https://download.pytorch.org/whl/torch_stable.html")
.unwrap(),
FlatIndexLocation::Url(
Url::parse("https://download.pytorch.org/whl/torch_stable.html").unwrap()
)
);
}
}

View file

@ -45,15 +45,13 @@ use unscanny::{Pattern, Scanner};
use url::Url;
use distribution_types::{UnresolvedRequirement, UnresolvedRequirementSpecification};
use pep508_rs::{
expand_env_vars, split_scheme, strip_host, Pep508Error, RequirementOrigin, Scheme, VerbatimUrl,
};
use pep508_rs::{expand_env_vars, Pep508Error, RequirementOrigin, VerbatimUrl};
use pypi_types::{Requirement, VerbatimParsedUrl};
#[cfg(feature = "http")]
use uv_client::BaseClient;
use uv_client::BaseClientBuilder;
use uv_configuration::{NoBinary, NoBuild, PackageNameSpecifier};
use uv_fs::{normalize_url_path, Simplified};
use uv_fs::Simplified;
use uv_warnings::warn_user;
use crate::requirement::EditableError;
@ -84,7 +82,7 @@ enum RequirementsTxtStatement {
/// `--extra-index-url`
ExtraIndexUrl(VerbatimUrl),
/// `--find-links`
FindLinks(FindLink),
FindLinks(VerbatimUrl),
/// `--no-index`
NoIndex,
/// `--no-binary`
@ -93,73 +91,6 @@ enum RequirementsTxtStatement {
OnlyBinary(NoBuild),
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum FindLink {
Path(PathBuf),
Url(Url),
}
impl FindLink {
/// Parse a raw string for a `--find-links` entry, which could be a URL or a local path.
///
/// For example:
/// - `file:///home/ferris/project/scripts/...`
/// - `file:../ferris/`
/// - `../ferris/`
/// - `https://download.pytorch.org/whl/torch_stable.html`
pub fn parse(given: &str, working_dir: impl AsRef<Path>) -> Result<Self, url::ParseError> {
// Expand environment variables.
let expanded = expand_env_vars(given);
if let Some((scheme, path)) = split_scheme(&expanded) {
match Scheme::parse(scheme) {
// Ex) `file:///home/ferris/project/scripts/...`, `file://localhost/home/ferris/project/scripts/...`, or `file:../ferris/`
Some(Scheme::File) => {
// Strip the leading slashes, along with the `localhost` host, if present.
let path = strip_host(path);
// Transform, e.g., `/C:/Users/ferris/wheel-0.42.0.tar.gz` to `C:\Users\ferris\wheel-0.42.0.tar.gz`.
let path = normalize_url_path(path);
let path = PathBuf::from(path.as_ref());
let path = if path.is_absolute() {
path
} else {
working_dir.as_ref().join(path)
};
Ok(Self::Path(path))
}
// Ex) `https://download.pytorch.org/whl/torch_stable.html`
Some(_) => {
let url = Url::parse(&expanded)?;
Ok(Self::Url(url))
}
// Ex) `C:/Users/ferris/wheel-0.42.0.tar.gz`
_ => {
let path = PathBuf::from(expanded.as_ref());
let path = if path.is_absolute() {
path
} else {
working_dir.as_ref().join(path)
};
Ok(Self::Path(path))
}
}
} else {
// Ex) `../ferris/`
let path = PathBuf::from(expanded.as_ref());
let path = if path.is_absolute() {
path
} else {
working_dir.as_ref().join(path)
};
Ok(Self::Path(path))
}
}
}
/// A [Requirement] with additional metadata from the `requirements.txt`, currently only hashes but in
/// the future also editable and similar information.
#[derive(Debug, Clone, Eq, PartialEq, Hash)]
@ -212,7 +143,7 @@ pub struct RequirementsTxt {
/// The extra index URLs, specified with `--extra-index-url`.
pub extra_index_urls: Vec<VerbatimUrl>,
/// The find links locations, specified with `--find-links`.
pub find_links: Vec<FindLink>,
pub find_links: Vec<VerbatimUrl>,
/// Whether to ignore the index, specified with `--no-index`.
pub no_index: bool,
/// Whether to disallow wheels, specified with `--no-binary`.
@ -445,8 +376,8 @@ impl RequirementsTxt {
RequirementsTxtStatement::ExtraIndexUrl(url) => {
data.extra_index_urls.push(url);
}
RequirementsTxtStatement::FindLinks(path_or_url) => {
data.find_links.push(path_or_url);
RequirementsTxtStatement::FindLinks(url) => {
data.find_links.push(url);
}
RequirementsTxtStatement::NoIndex => {
data.no_index = true;
@ -592,16 +523,26 @@ fn parse_entry(
} else if s.eat_if("--no-index") {
RequirementsTxtStatement::NoIndex
} else if s.eat_if("--find-links") || s.eat_if("-f") {
let path_or_url = parse_value(content, s, |c: char| !['\n', '\r', '#'].contains(&c))?;
let path_or_url = FindLink::parse(path_or_url, working_dir).map_err(|err| {
let given = parse_value(content, s, |c: char| !['\n', '\r', '#'].contains(&c))?;
let expanded = expand_env_vars(given);
let url = if let Ok(path) = Path::new(expanded.as_ref()).canonicalize() {
VerbatimUrl::from_path(path).map_err(|err| RequirementsTxtParserError::VerbatimUrl {
source: err,
url: given.to_string(),
start,
end: s.cursor(),
})?
} else {
VerbatimUrl::parse_url(expanded.as_ref()).map_err(|err| {
RequirementsTxtParserError::Url {
source: err,
url: path_or_url.to_string(),
url: given.to_string(),
start,
end: s.cursor(),
}
})?;
RequirementsTxtStatement::FindLinks(path_or_url)
})?
};
RequirementsTxtStatement::FindLinks(url.with_given(given))
} else if s.eat_if("--no-binary") {
let given = parse_value(content, s, |c: char| !['\n', '\r', '#'].contains(&c))?;
let specifier = PackageNameSpecifier::from_str(given).map_err(|err| {

View file

@ -1,4 +1,4 @@
use std::path::PathBuf;
use std::path::{Path, PathBuf};
use futures::{FutureExt, StreamExt};
use reqwest::Response;
@ -7,7 +7,6 @@ use url::Url;
use distribution_filename::DistFilename;
use distribution_types::{File, FileLocation, FlatIndexLocation, IndexUrl};
use pep508_rs::VerbatimUrl;
use uv_cache::{Cache, CacheBucket};
use crate::cached_client::{CacheControl, CachedClientError};
@ -16,6 +15,9 @@ use crate::{Connectivity, Error, ErrorKind, RegistryClient};
#[derive(Debug, thiserror::Error)]
pub enum FlatIndexError {
#[error("Expected a file URL, but received: {0}")]
NonFileUrl(Url),
#[error("Failed to read `--find-links` directory: {0}")]
FindLinksDirectory(PathBuf, #[source] FindLinksDirectoryError),
@ -97,12 +99,17 @@ impl<'a> FlatIndexClient<'a> {
let mut fetches = futures::stream::iter(indexes)
.map(|index| async move {
let entries = match index {
FlatIndexLocation::Path(path) => Self::read_from_directory(path)
.map_err(|err| FlatIndexError::FindLinksDirectory(path.clone(), err))?,
FlatIndexLocation::Path(url) => {
let path = url
.to_file_path()
.map_err(|()| FlatIndexError::NonFileUrl(url.to_url()))?;
Self::read_from_directory(&path, index)
.map_err(|err| FlatIndexError::FindLinksDirectory(path.clone(), err))?
}
FlatIndexLocation::Url(url) => self
.read_from_url(url)
.read_from_url(url, index)
.await
.map_err(|err| FlatIndexError::FindLinksUrl(url.clone(), err))?,
.map_err(|err| FlatIndexError::FindLinksUrl(url.to_url(), err))?,
};
if entries.is_empty() {
warn!("No packages found in `--find-links` entry: {}", index);
@ -126,7 +133,11 @@ impl<'a> FlatIndexClient<'a> {
}
/// Read a flat remote index from a `--find-links` URL.
async fn read_from_url(&self, url: &Url) -> Result<FlatIndexEntries, Error> {
async fn read_from_url(
&self,
url: &Url,
flat_index: &FlatIndexLocation,
) -> Result<FlatIndexEntries, Error> {
let cache_entry = self.cache.entry(
CacheBucket::FlatIndex,
"html",
@ -189,14 +200,13 @@ impl<'a> FlatIndexClient<'a> {
.await;
match response {
Ok(files) => {
let index_url = IndexUrl::Url(VerbatimUrl::from_url(url.clone()));
let files = files
.into_iter()
.filter_map(|file| {
Some((
DistFilename::try_from_normalized_filename(&file.filename)?,
file,
index_url.clone(),
IndexUrl::from(flat_index.clone()),
))
})
.collect();
@ -210,11 +220,10 @@ impl<'a> FlatIndexClient<'a> {
}
/// Read a flat remote index from a `--find-links` directory.
fn read_from_directory(path: &PathBuf) -> Result<FlatIndexEntries, FindLinksDirectoryError> {
// Absolute paths are required for the URL conversion.
let path = fs_err::canonicalize(path)?;
let index_url = IndexUrl::Path(VerbatimUrl::from_path(&path)?);
fn read_from_directory(
path: &Path,
flat_index: &FlatIndexLocation,
) -> Result<FlatIndexEntries, FindLinksDirectoryError> {
let mut dists = Vec::new();
for entry in fs_err::read_dir(path)? {
let entry = entry?;
@ -249,7 +258,7 @@ impl<'a> FlatIndexClient<'a> {
);
continue;
};
dists.push((filename, file, index_url.clone()));
dists.push((filename, file, IndexUrl::from(flat_index.clone())));
}
Ok(FlatIndexEntries::from_entries(dists))
}

View file

@ -3,9 +3,8 @@ use std::collections::BTreeMap;
use rustc_hash::FxHashMap;
use distribution_types::{CachedRegistryDist, FlatIndexLocation, Hashed, IndexLocations, IndexUrl};
use distribution_types::{CachedRegistryDist, Hashed, IndexLocations, IndexUrl};
use pep440_rs::Version;
use pep508_rs::VerbatimUrl;
use platform_tags::Tags;
use uv_cache::{Cache, CacheBucket, WheelCache};
use uv_fs::{directories, files, symlinks};
@ -91,15 +90,7 @@ impl<'a> RegistryWheelIndex<'a> {
// Collect into owned `IndexUrl`.
let flat_index_urls: Vec<IndexUrl> = index_locations
.flat_index()
.filter_map(|flat_index| match flat_index {
FlatIndexLocation::Path(path) => {
let path = fs_err::canonicalize(path).ok()?;
Some(IndexUrl::Path(VerbatimUrl::from_path(path).ok()?))
}
FlatIndexLocation::Url(url) => {
Some(IndexUrl::Url(VerbatimUrl::from_url(url.clone())))
}
})
.map(|flat_index| IndexUrl::from(flat_index.clone()))
.collect();
for index_url in index_locations.indexes().chain(flat_index_urls.iter()) {

View file

@ -40,7 +40,7 @@ use distribution_types::{
use pep508_rs::{UnnamedRequirement, UnnamedRequirementUrl};
use pypi_types::Requirement;
use pypi_types::VerbatimParsedUrl;
use requirements_txt::{FindLink, RequirementsTxt, RequirementsTxtRequirement};
use requirements_txt::{RequirementsTxt, RequirementsTxtRequirement};
use uv_client::BaseClientBuilder;
use uv_configuration::{NoBinary, NoBuild};
use uv_distribution::pyproject::PyProjectToml;
@ -143,10 +143,7 @@ impl RequirementsSpecification {
find_links: requirements_txt
.find_links
.into_iter()
.map(|link| match link {
FindLink::Url(url) => FlatIndexLocation::Url(url),
FindLink::Path(path) => FlatIndexLocation::Path(path),
})
.map(FlatIndexLocation::from)
.collect(),
no_binary: requirements_txt.no_binary,
no_build: requirements_txt.only_binary,

View file

@ -411,7 +411,7 @@ pub(crate) async fn pip_compile(
// If necessary, include the `--find-links` locations.
if include_find_links {
for flat_index in index_locations.flat_index() {
writeln!(writer, "--find-links {flat_index}")?;
writeln!(writer, "--find-links {}", flat_index.verbatim())?;
wrote_preamble = true;
}
}

View file

@ -5083,6 +5083,35 @@ fn emit_find_links() -> Result<()> {
Ok(())
}
/// Emit the `--find-links` locations using a relative path in a requirements file. The verbatim
/// path should be preserved.
#[test]
fn emit_find_links_relative() -> Result<()> {
let context = TestContext::new("3.12");
let requirements_in = context.temp_dir.child("requirements.in");
requirements_in.write_str("-f ./\niniconfig")?;
uv_snapshot!(context.pip_compile()
.arg("requirements.in")
.arg("--emit-find-links"), @r###"
success: true
exit_code: 0
----- stdout -----
# This file was autogenerated by uv via the following command:
# uv pip compile --cache-dir [CACHE_DIR] requirements.in --emit-find-links
--find-links ./
iniconfig==2.0.0
# via -r requirements.in
----- stderr -----
Resolved 1 package in [TIME]
"###
);
Ok(())
}
/// Emit the `--no-binary` and `--only-binary` options.
#[test]
fn emit_build_options() -> Result<()> {

View file

@ -1287,7 +1287,8 @@ fn resolve_find_links() -> anyhow::Result<()> {
extra_index: [],
flat_index: [
Url(
Url {
VerbatimUrl {
url: Url {
scheme: "https",
cannot_be_a_base: false,
username: "",
@ -1302,6 +1303,10 @@ fn resolve_find_links() -> anyhow::Result<()> {
query: None,
fragment: None,
},
given: Some(
"https://download.pytorch.org/whl/torch_stable.html",
),
},
),
],
no_index: true,