Implement RFC 7231 compliant relative URI and fragment handling in redirects (#13050)

This PR restores #13041 and integrates two PRs from @zanieb:
* #13038
* #13040

It also adds tests for relative URI and fragment handling.

Closes #13037.

---------

Co-authored-by: Zanie Blue <contact@zanie.dev>
This commit is contained in:
John Mumm 2025-04-28 09:07:06 +02:00 committed by GitHub
parent 576a4ae3a7
commit 4ee4a8861e
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
11 changed files with 696 additions and 37 deletions

View file

@ -64,3 +64,4 @@ hyper = { version = "1.4.1", features = ["server", "http1"] }
hyper-util = { version = "0.1.8", features = ["tokio"] }
insta = { version = "1.40.0", features = ["filters", "json", "redactions"] }
tokio = { workspace = true }
wiremock = { workspace = true }

View file

@ -6,14 +6,17 @@ use std::sync::Arc;
use std::time::Duration;
use std::{env, iter};
use anyhow::anyhow;
use http::{HeaderMap, HeaderName, HeaderValue, StatusCode};
use itertools::Itertools;
use reqwest::{Client, ClientBuilder, Proxy, Response};
use reqwest::{multipart, Client, ClientBuilder, IntoUrl, Proxy, Request, Response};
use reqwest_middleware::{ClientWithMiddleware, Middleware};
use reqwest_retry::policies::ExponentialBackoff;
use reqwest_retry::{
DefaultRetryableStrategy, RetryTransientMiddleware, Retryable, RetryableStrategy,
};
use tracing::{debug, trace};
use url::ParseError;
use url::Url;
use uv_auth::{AuthMiddleware, UrlAuthPolicies};
@ -60,6 +63,24 @@ pub struct BaseClientBuilder<'a> {
default_timeout: Duration,
extra_middleware: Option<ExtraMiddleware>,
proxies: Vec<Proxy>,
redirect_policy: RedirectPolicy,
}
/// The policy for handling redirects.
#[derive(Debug, Default, Clone, Copy)]
pub enum RedirectPolicy {
#[default]
BypassMiddleware,
RetriggerMiddleware,
}
impl RedirectPolicy {
pub fn reqwest_policy(self) -> reqwest::redirect::Policy {
match self {
RedirectPolicy::BypassMiddleware => reqwest::redirect::Policy::default(),
RedirectPolicy::RetriggerMiddleware => reqwest::redirect::Policy::none(),
}
}
}
/// A list of user-defined middlewares to be applied to the client.
@ -95,6 +116,7 @@ impl BaseClientBuilder<'_> {
default_timeout: Duration::from_secs(30),
extra_middleware: None,
proxies: vec![],
redirect_policy: RedirectPolicy::default(),
}
}
}
@ -172,6 +194,12 @@ impl<'a> BaseClientBuilder<'a> {
self
}
#[must_use]
pub fn redirect(mut self, policy: RedirectPolicy) -> Self {
self.redirect_policy = policy;
self
}
pub fn is_offline(&self) -> bool {
matches!(self.connectivity, Connectivity::Offline)
}
@ -228,6 +256,7 @@ impl<'a> BaseClientBuilder<'a> {
timeout,
ssl_cert_file_exists,
Security::Secure,
self.redirect_policy,
);
// Create an insecure client that accepts invalid certificates.
@ -236,11 +265,18 @@ impl<'a> BaseClientBuilder<'a> {
timeout,
ssl_cert_file_exists,
Security::Insecure,
self.redirect_policy,
);
// Wrap in any relevant middleware and handle connectivity.
let client = self.apply_middleware(raw_client.clone());
let dangerous_client = self.apply_middleware(raw_dangerous_client.clone());
let client = RedirectClientWithMiddleware {
client: self.apply_middleware(raw_client.clone()),
redirect_policy: self.redirect_policy,
};
let dangerous_client = RedirectClientWithMiddleware {
client: self.apply_middleware(raw_dangerous_client.clone()),
redirect_policy: self.redirect_policy,
};
BaseClient {
connectivity: self.connectivity,
@ -257,8 +293,14 @@ impl<'a> BaseClientBuilder<'a> {
/// Share the underlying client between two different middleware configurations.
pub fn wrap_existing(&self, existing: &BaseClient) -> BaseClient {
// Wrap in any relevant middleware and handle connectivity.
let client = self.apply_middleware(existing.raw_client.clone());
let dangerous_client = self.apply_middleware(existing.raw_dangerous_client.clone());
let client = RedirectClientWithMiddleware {
client: self.apply_middleware(existing.raw_client.clone()),
redirect_policy: self.redirect_policy,
};
let dangerous_client = RedirectClientWithMiddleware {
client: self.apply_middleware(existing.raw_dangerous_client.clone()),
redirect_policy: self.redirect_policy,
};
BaseClient {
connectivity: self.connectivity,
@ -278,6 +320,7 @@ impl<'a> BaseClientBuilder<'a> {
timeout: Duration,
ssl_cert_file_exists: bool,
security: Security,
redirect_policy: RedirectPolicy,
) -> Client {
// Configure the builder.
let client_builder = ClientBuilder::new()
@ -285,7 +328,8 @@ impl<'a> BaseClientBuilder<'a> {
.user_agent(user_agent)
.pool_max_idle_per_host(20)
.read_timeout(timeout)
.tls_built_in_root_certs(false);
.tls_built_in_root_certs(false)
.redirect(redirect_policy.reqwest_policy());
// If necessary, accept invalid certificates.
let client_builder = match security {
@ -382,9 +426,9 @@ impl<'a> BaseClientBuilder<'a> {
#[derive(Debug, Clone)]
pub struct BaseClient {
/// The underlying HTTP client that enforces valid certificates.
client: ClientWithMiddleware,
client: RedirectClientWithMiddleware,
/// The underlying HTTP client that accepts invalid certificates.
dangerous_client: ClientWithMiddleware,
dangerous_client: RedirectClientWithMiddleware,
/// The HTTP client without middleware.
raw_client: Client,
/// The HTTP client that accepts invalid certificates without middleware.
@ -409,7 +453,7 @@ enum Security {
impl BaseClient {
/// Selects the appropriate client based on the host's trustworthiness.
pub fn for_host(&self, url: &Url) -> &ClientWithMiddleware {
pub fn for_host(&self, url: &Url) -> &RedirectClientWithMiddleware {
if self.disable_ssl(url) {
&self.dangerous_client
} else {
@ -417,6 +461,12 @@ impl BaseClient {
}
}
/// Executes a request, applying redirect policy.
pub async fn execute(&self, req: Request) -> reqwest_middleware::Result<Response> {
let client = self.for_host(req.url());
client.execute(req).await
}
/// Returns `true` if the host is trusted to use the insecure client.
pub fn disable_ssl(&self, url: &Url) -> bool {
self.allow_insecure_host
@ -440,6 +490,205 @@ impl BaseClient {
}
}
/// Wrapper around [`ClientWithMiddleware`] that manages redirects.
#[derive(Debug, Clone)]
pub struct RedirectClientWithMiddleware {
client: ClientWithMiddleware,
redirect_policy: RedirectPolicy,
}
impl RedirectClientWithMiddleware {
/// Convenience method to make a `GET` request to a URL.
pub fn get<U: IntoUrl>(&self, url: U) -> RequestBuilder {
RequestBuilder::new(self.client.get(url), self)
}
/// Convenience method to make a `POST` request to a URL.
pub fn post<U: IntoUrl>(&self, url: U) -> RequestBuilder {
RequestBuilder::new(self.client.post(url), self)
}
/// Convenience method to make a `HEAD` request to a URL.
pub fn head<U: IntoUrl>(&self, url: U) -> RequestBuilder {
RequestBuilder::new(self.client.head(url), self)
}
/// Executes a request, applying the redirect policy.
pub async fn execute(&self, req: Request) -> reqwest_middleware::Result<Response> {
match self.redirect_policy {
RedirectPolicy::BypassMiddleware => self.client.execute(req).await,
RedirectPolicy::RetriggerMiddleware => self.execute_with_redirect_handling(req).await,
}
}
/// Executes a request. If the response is a redirect (one of HTTP 301, 302, 307, or 308), the
/// request is executed again with the redirect location URL (up to a maximum number of
/// redirects).
///
/// Unlike the built-in reqwest redirect policies, this sends the redirect request through the
/// entire middleware pipeline again.
///
/// See RFC 7231 7.1.2 <https://www.rfc-editor.org/rfc/rfc7231#section-7.1.2> for details on
/// redirect semantics.
async fn execute_with_redirect_handling(
&self,
req: Request,
) -> reqwest_middleware::Result<Response> {
let mut request = req;
let mut redirects = 0;
// This is the default used by reqwest.
let max_redirects = 10;
loop {
let request_url = request.url().clone();
let result = self
.client
.execute(request.try_clone().expect("HTTP request must be cloneable"))
.await;
if redirects == max_redirects {
return result;
}
let Ok(response) = result else {
return result;
};
// Handle redirect if we receive a 301, 302, 307, or 308.
let status = response.status();
if matches!(
status,
StatusCode::MOVED_PERMANENTLY
| StatusCode::FOUND
| StatusCode::TEMPORARY_REDIRECT
| StatusCode::PERMANENT_REDIRECT
) {
let location = response
.headers()
.get("location")
.ok_or(reqwest_middleware::Error::Middleware(anyhow!(
"Missing expected HTTP {status} 'Location' header"
)))?
.to_str()
.map_err(|_| {
reqwest_middleware::Error::Middleware(anyhow!(
"Invalid HTTP {status} 'Location' value: must only contain visible ascii characters"
))
})?;
let mut redirect_url = match Url::parse(location) {
Ok(url) => url,
// Per RFC 7231, URLs should be resolved against the request URL.
Err(ParseError::RelativeUrlWithoutBase) => request_url.join(location).map_err(|err| {
reqwest_middleware::Error::Middleware(anyhow!(
"Invalid HTTP {status} 'Location' value `{location}` relative to `{request_url}`: {err}"
))
})?,
Err(err) => {
return Err(reqwest_middleware::Error::Middleware(anyhow!(
"Invalid HTTP {status} 'Location' value `{location}`: {err}"
)));
}
};
// Ensure the URL is a valid HTTP URI.
if let Err(err) = redirect_url.as_str().parse::<http::Uri>() {
return Err(reqwest_middleware::Error::Middleware(anyhow!(
"Invalid HTTP {status} 'Location' value `{location}`: {err}"
)));
}
// Per RFC 7231, fragments must be propagated
if let Some(fragment) = request_url.fragment() {
redirect_url.set_fragment(Some(fragment));
}
debug!("Received HTTP {status} to {redirect_url}");
*request.url_mut() = redirect_url;
redirects += 1;
continue;
}
return Ok(response);
}
}
pub fn raw_client(&self) -> &ClientWithMiddleware {
&self.client
}
}
impl From<RedirectClientWithMiddleware> for ClientWithMiddleware {
fn from(item: RedirectClientWithMiddleware) -> ClientWithMiddleware {
item.client
}
}
/// A builder to construct the properties of a `Request`.
///
/// This wraps [`reqwest_middleware::RequestBuilder`] to ensure that the [`BaseClient`]
/// redirect policy is respected if `send()` is called.
#[derive(Debug)]
#[must_use]
pub struct RequestBuilder<'a> {
builder: reqwest_middleware::RequestBuilder,
client: &'a RedirectClientWithMiddleware,
}
impl<'a> RequestBuilder<'a> {
pub fn new(
builder: reqwest_middleware::RequestBuilder,
client: &'a RedirectClientWithMiddleware,
) -> Self {
Self { builder, client }
}
/// Add a `Header` to this Request.
pub fn header<K, V>(mut self, key: K, value: V) -> Self
where
HeaderName: TryFrom<K>,
<HeaderName as TryFrom<K>>::Error: Into<http::Error>,
HeaderValue: TryFrom<V>,
<HeaderValue as TryFrom<V>>::Error: Into<http::Error>,
{
self.builder = self.builder.header(key, value);
self
}
/// Add a set of Headers to the existing ones on this Request.
///
/// The headers will be merged in to any already set.
pub fn headers(mut self, headers: HeaderMap) -> Self {
self.builder = self.builder.headers(headers);
self
}
#[cfg(not(target_arch = "wasm32"))]
pub fn version(mut self, version: reqwest::Version) -> Self {
self.builder = self.builder.version(version);
self
}
#[cfg_attr(docsrs, doc(cfg(feature = "multipart")))]
pub fn multipart(mut self, multipart: multipart::Form) -> Self {
self.builder = self.builder.multipart(multipart);
self
}
/// Build a `Request`.
pub fn build(self) -> reqwest::Result<Request> {
self.builder.build()
}
/// Constructs the Request and sends it to the target URL, returning a
/// future Response.
pub async fn send(self) -> reqwest_middleware::Result<Response> {
self.client.execute(self.build()?).await
}
pub fn raw_builder(&self) -> &reqwest_middleware::RequestBuilder {
&self.builder
}
}
/// Extends [`DefaultRetryableStrategy`], to log transient request failures and additional retry cases.
pub struct UvRetryableStrategy;

View file

@ -510,7 +510,6 @@ impl CachedClient {
debug!("Sending revalidation request for: {url}");
let response = self
.0
.for_host(req.url())
.execute(req)
.instrument(info_span!("revalidation_request", url = url.as_str()))
.await
@ -551,7 +550,6 @@ impl CachedClient {
let cache_policy_builder = CachePolicyBuilder::new(&req);
let response = self
.0
.for_host(&url)
.execute(req)
.await
.map_err(|err| ErrorKind::from_reqwest_middleware(url.clone(), err))?

View file

@ -1,6 +1,6 @@
pub use base_client::{
is_extended_transient_error, AuthIntegration, BaseClient, BaseClientBuilder, ExtraMiddleware,
UvRetryableStrategy, DEFAULT_RETRIES,
RedirectClientWithMiddleware, RequestBuilder, UvRetryableStrategy, DEFAULT_RETRIES,
};
pub use cached_client::{CacheControl, CachedClient, CachedClientError, DataWithCachePolicy};
pub use error::{Error, ErrorKind, WrappedReqwestError};

View file

@ -10,7 +10,6 @@ use futures::{FutureExt, StreamExt, TryStreamExt};
use http::HeaderMap;
use itertools::Either;
use reqwest::{Proxy, Response, StatusCode};
use reqwest_middleware::ClientWithMiddleware;
use rustc_hash::FxHashMap;
use tokio::sync::{Mutex, Semaphore};
use tracing::{info_span, instrument, trace, warn, Instrument};
@ -34,7 +33,7 @@ use uv_pypi_types::{ResolutionMetadata, SimpleJson};
use uv_small_str::SmallString;
use uv_torch::TorchStrategy;
use crate::base_client::{BaseClientBuilder, ExtraMiddleware};
use crate::base_client::{BaseClientBuilder, ExtraMiddleware, RedirectPolicy};
use crate::cached_client::CacheControl;
use crate::flat_index::FlatIndexEntry;
use crate::html::SimpleHtml;
@ -42,7 +41,7 @@ use crate::remote_metadata::wheel_metadata_from_remote_zip;
use crate::rkyvutil::OwnedArchive;
use crate::{
BaseClient, CachedClient, CachedClientError, Error, ErrorKind, FlatIndexClient,
FlatIndexEntries,
FlatIndexEntries, RedirectClientWithMiddleware,
};
/// A builder for an [`RegistryClient`].
@ -158,7 +157,9 @@ impl<'a> RegistryClientBuilder<'a> {
pub fn build(self) -> RegistryClient {
// Build a base client
let builder = self.base_client_builder;
let builder = self
.base_client_builder
.redirect(RedirectPolicy::RetriggerMiddleware);
let client = builder.build();
@ -255,7 +256,7 @@ impl RegistryClient {
}
/// Return the [`BaseClient`] used by this client.
pub fn uncached_client(&self, url: &Url) -> &ClientWithMiddleware {
pub fn uncached_client(&self, url: &Url) -> &RedirectClientWithMiddleware {
self.client.uncached().for_host(url)
}
@ -1175,6 +1176,215 @@ mod tests {
use crate::{html::SimpleHtml, SimpleMetadata, SimpleMetadatum};
use uv_cache::Cache;
use wiremock::matchers::{basic_auth, method, path_regex};
use wiremock::{Mock, MockServer, ResponseTemplate};
use crate::RegistryClientBuilder;
type Error = Box<dyn std::error::Error>;
async fn start_test_server(username: &'static str, password: &'static str) -> MockServer {
let server = MockServer::start().await;
Mock::given(method("GET"))
.and(basic_auth(username, password))
.respond_with(ResponseTemplate::new(200))
.mount(&server)
.await;
Mock::given(method("GET"))
.respond_with(ResponseTemplate::new(401))
.mount(&server)
.await;
server
}
#[tokio::test]
async fn test_redirect_to_server_with_credentials() -> Result<(), Error> {
let username = "user";
let password = "password";
let auth_server = start_test_server(username, password).await;
let auth_base_url = Url::parse(&auth_server.uri())?;
let redirect_server = MockServer::start().await;
// Configure the redirect server to respond with a 302 to the auth server
Mock::given(method("GET"))
.respond_with(
ResponseTemplate::new(302).insert_header("Location", format!("{}", &auth_base_url)),
)
.mount(&redirect_server)
.await;
let redirect_server_url = Url::parse(&redirect_server.uri())?;
let cache = Cache::temp()?;
let registry_client = RegistryClientBuilder::new(cache).build();
let client = registry_client.cached_client().uncached();
assert_eq!(
client
.for_host(&redirect_server_url)
.get(redirect_server.uri())
.send()
.await?
.status(),
401,
"Requests should fail if credentials are missing"
);
let mut url = redirect_server_url.clone();
let _ = url.set_username(username);
let _ = url.set_password(Some(password));
assert_eq!(
client
.for_host(&redirect_server_url)
.get(format!("{url}"))
.send()
.await?
.status(),
200,
"Requests should succeed if credentials are present"
);
Ok(())
}
#[tokio::test]
async fn test_redirect_root_relative_url() -> Result<(), Error> {
let username = "user";
let password = "password";
let redirect_server = MockServer::start().await;
// Configure the redirect server to respond with a 307 with a relative URL.
Mock::given(method("GET"))
.and(path_regex("/foo/"))
.respond_with(
ResponseTemplate::new(307).insert_header("Location", "/bar/baz/".to_string()),
)
.mount(&redirect_server)
.await;
Mock::given(method("GET"))
.and(path_regex("/bar/baz/"))
.and(basic_auth(username, password))
.respond_with(ResponseTemplate::new(200))
.mount(&redirect_server)
.await;
let redirect_server_url = Url::parse(&redirect_server.uri())?.join("foo/")?;
let cache = Cache::temp()?;
let registry_client = RegistryClientBuilder::new(cache).build();
let client = registry_client.cached_client().uncached();
let mut url = redirect_server_url.clone();
let _ = url.set_username(username);
let _ = url.set_password(Some(password));
assert_eq!(
client
.for_host(&url)
.get(format!("{url}"))
.send()
.await?
.status(),
200,
"Requests should succeed for relative URL"
);
Ok(())
}
#[tokio::test]
async fn test_redirect_relative_url() -> Result<(), Error> {
let username = "user";
let password = "password";
let redirect_server = MockServer::start().await;
// Configure the redirect server to respond with a 307 with a relative URL.
Mock::given(method("GET"))
.and(path_regex("/foo/bar/baz/"))
.and(basic_auth(username, password))
.respond_with(ResponseTemplate::new(200))
.mount(&redirect_server)
.await;
Mock::given(method("GET"))
.and(path_regex("/foo/"))
.respond_with(
ResponseTemplate::new(307).insert_header("Location", "bar/baz/".to_string()),
)
.mount(&redirect_server)
.await;
let cache = Cache::temp()?;
let registry_client = RegistryClientBuilder::new(cache).build();
let client = registry_client.cached_client().uncached();
let redirect_server_url = Url::parse(&redirect_server.uri())?.join("foo/")?;
let mut url = redirect_server_url.clone();
let _ = url.set_username(username);
let _ = url.set_password(Some(password));
assert_eq!(
client
.for_host(&url)
.get(format!("{url}"))
.send()
.await?
.status(),
200,
"Requests should succeed for relative URL"
);
Ok(())
}
#[tokio::test]
async fn test_redirect_preserve_fragment() -> Result<(), Error> {
let redirect_server = MockServer::start().await;
// Configure the redirect server to respond with a 307 with a relative URL.
Mock::given(method("GET"))
.respond_with(ResponseTemplate::new(307).insert_header("Location", "/foo".to_string()))
.mount(&redirect_server)
.await;
Mock::given(method("GET"))
.and(path_regex("/foo"))
.respond_with(ResponseTemplate::new(200))
.mount(&redirect_server)
.await;
let cache = Cache::temp()?;
let registry_client = RegistryClientBuilder::new(cache).build();
let client = registry_client.cached_client().uncached();
let mut url = Url::parse(&redirect_server.uri())?;
url.set_fragment(Some("fragment"));
assert_eq!(
client
.for_host(&url)
.get(format!("{}", url.clone()))
.send()
.await?
.url()
.to_string(),
format!("{}/foo#fragment", redirect_server.uri()),
"Requests should preserve fragment"
);
Ok(())
}
#[test]
fn ignore_failing_files() {
// 1.7.7 has an invalid requires-python field (double comma), 1.7.8 is valid