Ensure authentication is passed from the index url to distribution files (#1886)

Closes https://github.com/astral-sh/uv/issues/1709
Closes https://github.com/astral-sh/uv/issues/1371

Tested with the reproduction provided in #1709 which gets past the HTTP
401.

Reuses the same copying logic we introduced in
https://github.com/astral-sh/uv/pull/1874 to ensure authentication is
attached to file URLs with a realm that matches that of the index. I had
to move the authentication logic into a new crate so it could be used in
`distribution-types`.

We will want to something more robust in the future, like track all
realms with authentication in a central store and perform lookups there.
That's what `pip` does and it allows consolidation of logic like netrc
lookups. That refactor feels significant though, and I'd like to get
this fixed ASAP so this is a minimal fix.
This commit is contained in:
Zanie Blue 2024-02-22 18:10:17 -06:00 committed by GitHub
parent 3542a65fd0
commit 8a12b2ebf9
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
10 changed files with 81 additions and 36 deletions

10
Cargo.lock generated
View file

@ -909,6 +909,7 @@ dependencies = [
"tracing",
"url",
"urlencoding",
"uv-auth",
"uv-fs",
"uv-git",
"uv-normalize",
@ -4193,6 +4194,14 @@ dependencies = [
"which",
]
[[package]]
name = "uv-auth"
version = "0.0.1"
dependencies = [
"tracing",
"url",
]
[[package]]
name = "uv-build"
version = "0.0.1"
@ -4284,6 +4293,7 @@ dependencies = [
"tracing",
"url",
"urlencoding",
"uv-auth",
"uv-cache",
"uv-fs",
"uv-normalize",

View file

@ -18,6 +18,7 @@ distribution-filename = { path = "../distribution-filename", features = ["serde"
pep440_rs = { path = "../pep440-rs" }
pep508_rs = { path = "../pep508-rs" }
platform-tags = { path = "../platform-tags" }
uv-auth = { path = "../uv-auth" }
uv-fs = { path = "../uv-fs" }
uv-git = { path = "../uv-git", features = ["vendored-openssl"] }
uv-normalize = { path = "../uv-normalize" }

View file

@ -6,6 +6,8 @@ use thiserror::Error;
use pep440_rs::{VersionSpecifiers, VersionSpecifiersParseError};
use pypi_types::{DistInfoMetadata, Hashes, Yanked};
use url::Url;
use uv_auth::safe_copy_url_auth_to_str;
/// Error converting [`pypi_types::File`] to [`distribution_type::File`].
#[derive(Debug, Error)]
@ -39,7 +41,7 @@ pub struct File {
impl File {
/// `TryFrom` instead of `From` to filter out files with invalid requires python version specifiers
pub fn try_from(file: pypi_types::File, base: &str) -> Result<Self, FileConversionError> {
pub fn try_from(file: pypi_types::File, base: &Url) -> Result<Self, FileConversionError> {
Ok(Self {
dist_info_metadata: file.dist_info_metadata,
filename: file.filename,
@ -51,7 +53,12 @@ impl File {
size: file.size,
upload_time_utc_ms: file.upload_time.map(|dt| dt.timestamp_millis()),
url: if file.url.contains("://") {
FileLocation::AbsoluteUrl(file.url)
let url = safe_copy_url_auth_to_str(base, &file.url)
.map_err(|err| FileConversionError::Url(file.url.clone(), err))?
.map(|url| url.to_string())
.unwrap_or(file.url);
FileLocation::AbsoluteUrl(url)
} else {
FileLocation::RelativeUrl(base.to_string(), file.url)
},

View file

@ -76,6 +76,12 @@ impl BaseUrl {
pub fn as_url(&self) -> &Url {
&self.0
}
/// Convert to the underlying [`Url`].
#[must_use]
pub fn into_url(self) -> Url {
self.0
}
}
impl From<Url> for BaseUrl {

View file

@ -0,0 +1,8 @@
[package]
name = "uv-auth"
version = "0.0.1"
edition = "2021"
[dependencies]
url = { workspace = true }
tracing = { workspace = true }

View file

@ -1,26 +1,39 @@
/// HTTP authentication utilities.
use tracing::warn;
use url::Url;
/// HTTP authentication utilities.
/// Optimized version of [`safe_copy_url_auth`] which avoids parsing a string
/// into a URL unless the given URL has authentication to copy. Useful for patterns
/// where the returned URL would immediately be cast into a string.
///
/// Returns [`Err`] if there is authentication to copy and `new_url` is not a valid URL.
/// Returns [`None`] if there is no authentication to copy.
pub fn safe_copy_url_auth_to_str(
trusted_url: &Url,
new_url: &str,
) -> Result<Option<Url>, url::ParseError> {
if trusted_url.username().is_empty() && trusted_url.password().is_none() {
return Ok(None);
}
let new_url = Url::parse(new_url)?;
Ok(Some(safe_copy_url_auth(trusted_url, new_url)))
}
/// Copy authentication from one URL to another URL if applicable.
///
/// See [`should_retain_auth`] for details on when authentication is retained.
#[must_use]
pub(crate) fn safe_copy_auth(request_url: &Url, mut response_url: Url) -> Url {
if should_retain_auth(request_url, &response_url) {
response_url
.set_username(request_url.username())
.unwrap_or_else(|_| {
warn!("Failed to transfer username to response URL: {response_url}")
});
response_url
.set_password(request_url.password())
.unwrap_or_else(|_| {
warn!("Failed to transfer password to response URL: {response_url}")
});
pub fn safe_copy_url_auth(trusted_url: &Url, mut new_url: Url) -> Url {
if should_retain_auth(trusted_url, &new_url) {
new_url
.set_username(trusted_url.username())
.unwrap_or_else(|_| warn!("Failed to transfer username to response URL: {new_url}"));
new_url
.set_password(trusted_url.password())
.unwrap_or_else(|_| warn!("Failed to transfer password to response URL: {new_url}"));
}
response_url
new_url
}
/// Determine if authentication information should be retained on a new URL.
@ -28,7 +41,7 @@ pub(crate) fn safe_copy_auth(request_url: &Url, mut response_url: Url) -> Url {
///
/// <https://datatracker.ietf.org/doc/html/rfc7235#section-2.2>
/// <https://datatracker.ietf.org/doc/html/rfc7230#section-5.5>
fn should_retain_auth(request_url: &Url, response_url: &Url) -> bool {
fn should_retain_auth(trusted_url: &Url, new_url: &Url) -> bool {
// The "scheme" and "authority" components must match to retain authentication
// The "authority", is composed of the host and port.
@ -37,12 +50,12 @@ fn should_retain_auth(request_url: &Url, response_url: &Url) -> bool {
// Note some clients such as Python's `requests` library allow an upgrade
// from `http` to `https` but this is not spec-compliant.
// <https://github.com/pypa/pip/blob/75f54cae9271179b8cc80435f92336c97e349f9d/src/pip/_vendor/requests/sessions.py#L133-L136>
if request_url.scheme() != response_url.scheme() {
if trusted_url.scheme() != new_url.scheme() {
return false;
}
// The host must always be an exact match.
if request_url.host() != response_url.host() {
if trusted_url.host() != new_url.host() {
return false;
}
@ -50,7 +63,7 @@ fn should_retain_auth(request_url: &Url, response_url: &Url) -> bool {
// The port is only allowed to differ if it it matches the "default port" for the scheme.
// However, `reqwest` sets the `port` to `None` if it matches the default port so we do
// not need any special handling here.
if request_url.port() != response_url.port() {
if trusted_url.port() != new_url.port() {
return false;
}
@ -61,7 +74,7 @@ fn should_retain_auth(request_url: &Url, response_url: &Url) -> bool {
mod tests {
use url::{ParseError, Url};
use crate::auth::should_retain_auth;
use crate::should_retain_auth;
#[test]
fn test_should_retain_auth() -> Result<(), ParseError> {

View file

@ -11,6 +11,7 @@ install-wheel-rs = { path = "../install-wheel-rs" }
pep440_rs = { path = "../pep440-rs" }
pep508_rs = { path = "../pep508-rs" }
platform-tags = { path = "../platform-tags" }
uv-auth = { path = "../uv-auth" }
uv-cache = { path = "../uv-cache" }
uv-fs = { path = "../uv-fs", features = ["tokio"] }
uv-normalize = { path = "../uv-normalize" }

View file

@ -16,10 +16,10 @@ use distribution_types::{
use pep440_rs::Version;
use platform_tags::Tags;
use pypi_types::{Hashes, Yanked};
use uv_auth::safe_copy_url_auth;
use uv_cache::{Cache, CacheBucket};
use uv_normalize::PackageName;
use crate::auth::safe_copy_auth;
use crate::cached_client::{CacheControl, CachedClientError};
use crate::html::SimpleHtml;
use crate::{Connectivity, Error, ErrorKind, RegistryClient};
@ -156,16 +156,17 @@ impl<'a> FlatIndexClient<'a> {
async {
// Use the response URL, rather than the request URL, as the base for relative URLs.
// This ensures that we handle redirects and other URL transformations correctly.
let url = safe_copy_auth(url, response.url().clone());
let url = safe_copy_url_auth(url, response.url().clone());
let text = response.text().await.map_err(ErrorKind::RequestError)?;
let SimpleHtml { base, files } = SimpleHtml::parse(&text, &url)
.map_err(|err| Error::from_html_err(err, url.clone()))?;
let base = safe_copy_url_auth(&url, base.into_url());
let files: Vec<File> = files
.into_iter()
.filter_map(|file| {
match File::try_from(file, base.as_url().as_str()) {
match File::try_from(file, &base) {
Ok(file) => Some(file),
Err(err) => {
// Ignore files with unparsable version specifiers.

View file

@ -7,7 +7,6 @@ pub use registry_client::{
};
pub use rkyvutil::OwnedArchive;
mod auth;
mod cached_client;
mod error;
mod flat_index;

View file

@ -21,11 +21,11 @@ use distribution_types::{BuiltDist, File, FileLocation, IndexUrl, IndexUrls, Nam
use install_wheel_rs::{find_dist_info, is_metadata_entry};
use pep440_rs::Version;
use pypi_types::{Metadata21, SimpleJson};
use uv_auth::safe_copy_url_auth;
use uv_cache::{Cache, CacheBucket, WheelCache};
use uv_normalize::PackageName;
use uv_warnings::warn_user_once;
use crate::auth::safe_copy_auth;
use crate::cached_client::CacheControl;
use crate::html::SimpleHtml;
use crate::middleware::OfflineMiddleware;
@ -249,7 +249,7 @@ impl RegistryClient {
async {
// Use the response URL, rather than the request URL, as the base for relative URLs.
// This ensures that we handle redirects and other URL transformations correctly.
let url = safe_copy_auth(&url, response.url().clone());
let url = safe_copy_url_auth(&url, response.url().clone());
let content_type = response
.headers()
@ -271,17 +271,16 @@ impl RegistryClient {
let bytes = response.bytes().await.map_err(ErrorKind::RequestError)?;
let data: SimpleJson = serde_json::from_slice(bytes.as_ref())
.map_err(|err| Error::from_json_err(err, url.clone()))?;
let metadata =
SimpleMetadata::from_files(data.files, package_name, url.as_str());
metadata
SimpleMetadata::from_files(data.files, package_name, &url)
}
MediaType::Html => {
let text = response.text().await.map_err(ErrorKind::RequestError)?;
let SimpleHtml { base, files } = SimpleHtml::parse(&text, &url)
.map_err(|err| Error::from_html_err(err, url.clone()))?;
let metadata =
SimpleMetadata::from_files(files, package_name, base.as_url().as_str());
metadata
let base = safe_copy_url_auth(&url, base.into_url());
SimpleMetadata::from_files(files, package_name, &base)
}
};
OwnedArchive::from_unarchived(&unarchived)
@ -670,7 +669,7 @@ impl SimpleMetadata {
self.0.iter()
}
fn from_files(files: Vec<pypi_types::File>, package_name: &PackageName, base: &str) -> Self {
fn from_files(files: Vec<pypi_types::File>, package_name: &PackageName, base: &Url) -> Self {
let mut map: BTreeMap<Version, VersionFiles> = BTreeMap::default();
// Group the distributions by version and kind
@ -810,11 +809,11 @@ mod tests {
}
"#;
let data: SimpleJson = serde_json::from_str(response).unwrap();
let base = "https://pypi.org/simple/pyflyby/";
let base = Url::parse("https://pypi.org/simple/pyflyby/").unwrap();
let simple_metadata = SimpleMetadata::from_files(
data.files,
&PackageName::from_str("pyflyby").unwrap(),
base,
&base,
);
let versions: Vec<String> = simple_metadata
.iter()