From c12e8bb34376e3d13eb0560344785094923e8846 Mon Sep 17 00:00:00 2001 From: Mark Dodgson Date: Thu, 16 Oct 2025 21:53:49 +0200 Subject: [PATCH] Implement RFC9457 compliant messaging (#16199) ## Summary HTTP1.1 [RFC 9112 - HTTP/1.1](https://www.rfc-editor.org/rfc/rfc9112.html#name-status-line) section 4 defines the response status code to optionally include a text description (human readable) of the reason for the status code. [RFC9113 - HTTP/2](https://www.rfc-editor.org/rfc/rfc9113) is the HTTP2 protocol standard and the response status only considers the [status code](https://www.rfc-editor.org/rfc/rfc9113#name-response-pseudo-header-fiel) and not the reason phrase, and as such important information can be lost in helping the client determine a route cause of a failure. As per discussion on this [PR](https://github.com/astral-sh/uv/pull/15979) the current feeling is that implementing the RFC9457 standard might be the preferred route. This PR makes those changes to aid the discussion which has also been moved to the [PEP board](https://discuss.python.org/t/block-download-of-components-when-violating-policy/104021/1) ## Test Plan Pulling components that violate our policy over HTTP2 and without any RFC9457 implementation the following message is presented to the user: image With the RFC9457 standard implemented, below you can see the advantage in the extra context as to why the component has been blocked: image --------- Co-authored-by: konstin --- crates/uv-client/src/cached_client.rs | 64 +++++++- crates/uv-client/src/error.rs | 219 ++++++++++++++++++++++++-- crates/uv/tests/it/network.rs | 49 ++++++ 3 files changed, 319 insertions(+), 13 deletions(-) diff --git a/crates/uv-client/src/cached_client.rs b/crates/uv-client/src/cached_client.rs index 0f32aa51f..6c31a6515 100644 --- a/crates/uv-client/src/cached_client.rs +++ b/crates/uv-client/src/cached_client.rs @@ -15,12 +15,32 @@ use uv_redacted::DisplaySafeUrl; use crate::BaseClient; use crate::base_client::is_transient_network_error; +use crate::error::ProblemDetails; use crate::{ Error, ErrorKind, httpcache::{AfterResponse, BeforeRequest, CachePolicy, CachePolicyBuilder}, rkyvutil::OwnedArchive, }; +/// Extract problem details from an HTTP response if it has the correct content type +/// +/// Note: This consumes the response body, so it should only be called when there's an error status. +async fn extract_problem_details(response: Response) -> Option { + match response.bytes().await { + Ok(bytes) => match serde_json::from_slice(&bytes) { + Ok(details) => Some(details), + Err(err) => { + warn!("Failed to parse problem details: {err}"); + None + } + }, + Err(err) => { + warn!("Failed to read response body for problem details: {err}"); + None + } + } +} + /// A trait the generalizes (de)serialization at a high level. /// /// The main purpose of this trait is to make the `CachedClient` work for @@ -544,9 +564,29 @@ impl CachedClient { .execute(req) .instrument(info_span!("revalidation_request", url = url.as_str())) .await - .map_err(|err| ErrorKind::from_reqwest_middleware(url.clone(), err))? - .error_for_status() - .map_err(|err| ErrorKind::from_reqwest(url.clone(), err))?; + .map_err(|err| ErrorKind::from_reqwest_middleware(url.clone(), err))?; + + // Check for HTTP error status and extract problem details if available + if let Err(status_error) = response.error_for_status_ref() { + // Clone the response to extract problem details before the error consumes it + let problem_details = if response + .headers() + .get("content-type") + .and_then(|ct| ct.to_str().ok()) + .map(|ct| ct == "application/problem+json") + .unwrap_or(false) + { + extract_problem_details(response).await + } else { + None + }; + return Err(ErrorKind::from_reqwest_with_problem_details( + url.clone(), + status_error, + problem_details, + ) + .into()); + } // If the user set a custom `Cache-Control` header, override it. if let CacheControl::Override(header) = cache_control { @@ -611,9 +651,25 @@ impl CachedClient { .map(|retries| retries.value()); if let Err(status_error) = response.error_for_status_ref() { + let problem_details = if response + .headers() + .get("content-type") + .and_then(|ct| ct.to_str().ok()) + .map(|ct| ct.starts_with("application/problem+json")) + .unwrap_or(false) + { + extract_problem_details(response).await + } else { + None + }; return Err(CachedClientError::::Client { retries: retry_count, - err: ErrorKind::from_reqwest(url, status_error).into(), + err: ErrorKind::from_reqwest_with_problem_details( + url, + status_error, + problem_details, + ) + .into(), } .into()); } diff --git a/crates/uv-client/src/error.rs b/crates/uv-client/src/error.rs index bd3df6d51..1b802fb32 100644 --- a/crates/uv-client/src/error.rs +++ b/crates/uv-client/src/error.rs @@ -3,6 +3,7 @@ use std::ops::Deref; use async_http_range_reader::AsyncHttpRangeReaderError; use async_zip::error::ZipError; +use serde::Deserialize; use uv_distribution_filename::{WheelFilename, WheelFilenameError}; use uv_normalize::PackageName; @@ -11,6 +12,61 @@ use uv_redacted::DisplaySafeUrl; use crate::middleware::OfflineError; use crate::{FlatIndexError, html}; +/// RFC 9457 Problem Details for HTTP APIs +/// +/// This structure represents the standard format for machine-readable details +/// of errors in HTTP response bodies as defined in RFC 9457. +#[derive(Debug, Clone, Deserialize)] +pub struct ProblemDetails { + /// A URI reference that identifies the problem type. + /// When dereferenced, it SHOULD provide human-readable documentation for the problem type. + #[serde(rename = "type", default = "default_problem_type")] + pub problem_type: String, + + /// A short, human-readable summary of the problem type. + pub title: Option, + + /// The HTTP status code generated by the origin server for this occurrence of the problem. + pub status: Option, + + /// A human-readable explanation specific to this occurrence of the problem. + pub detail: Option, + + /// A URI reference that identifies the specific occurrence of the problem. + pub instance: Option, +} + +/// Default problem type URI as per RFC 9457 +#[inline] +fn default_problem_type() -> String { + "about:blank".to_string() +} + +impl ProblemDetails { + /// Get a human-readable description of the problem + pub fn description(&self) -> Option { + match self { + Self { + title: Some(title), + detail: Some(detail), + .. + } => Some(format!("Server message: {title}, {detail}")), + Self { + title: Some(title), .. + } => Some(format!("Server message: {title}")), + Self { + detail: Some(detail), + .. + } => Some(format!("Server message: {detail}")), + Self { + status: Some(status), + .. + } => Some(format!("HTTP error {status}")), + _ => None, + } + } +} + #[derive(Debug)] pub struct Error { kind: Box, @@ -330,7 +386,19 @@ impl ErrorKind { } } - Self::WrappedReqwestError(url, WrappedReqwestError(err)) + Self::WrappedReqwestError(url, WrappedReqwestError::from(err)) + } + + /// Create an [`ErrorKind`] from a [`reqwest::Error`] with problem details. + pub(crate) fn from_reqwest_with_problem_details( + url: DisplaySafeUrl, + error: reqwest::Error, + problem_details: Option, + ) -> Self { + Self::WrappedReqwestError( + url, + WrappedReqwestError::with_problem_details(error.into(), problem_details), + ) } } @@ -340,12 +408,26 @@ impl ErrorKind { /// Wraps a [`reqwest_middleware::Error`] instead of an [`reqwest::Error`] since the actual reqwest /// error may be below some context in the [`anyhow::Error`]. #[derive(Debug)] -pub struct WrappedReqwestError(reqwest_middleware::Error); +pub struct WrappedReqwestError { + error: reqwest_middleware::Error, + problem_details: Option>, +} impl WrappedReqwestError { + /// Create a new `WrappedReqwestError` with optional problem details + pub fn with_problem_details( + error: reqwest_middleware::Error, + problem_details: Option, + ) -> Self { + Self { + error, + problem_details: problem_details.map(Box::new), + } + } + /// Return the inner [`reqwest::Error`] from the error chain, if it exists. fn inner(&self) -> Option<&reqwest::Error> { - match &self.0 { + match &self.error { reqwest_middleware::Error::Reqwest(err) => Some(err), reqwest_middleware::Error::Middleware(err) => err.chain().find_map(|err| { if let Some(err) = err.downcast_ref::() { @@ -407,13 +489,19 @@ impl WrappedReqwestError { impl From for WrappedReqwestError { fn from(error: reqwest::Error) -> Self { - Self(error.into()) + Self { + error: error.into(), + problem_details: None, + } } } impl From for WrappedReqwestError { fn from(error: reqwest_middleware::Error) -> Self { - Self(error) + Self { + error, + problem_details: None, + } } } @@ -421,7 +509,7 @@ impl Deref for WrappedReqwestError { type Target = reqwest_middleware::Error; fn deref(&self) -> &Self::Target { - &self.0 + &self.error } } @@ -430,9 +518,15 @@ impl Display for WrappedReqwestError { if self.is_likely_offline() { // Insert an extra hint, we'll show the wrapped error through `source` f.write_str("Could not connect, are you offline?") + } else if let Some(problem_details) = &self.problem_details { + // Show problem details if available + match problem_details.description() { + None => Display::fmt(&self.error, f), + Some(message) => f.write_str(&message), + } } else { // Show the wrapped error - Display::fmt(&self.0, f) + Display::fmt(&self.error, f) } } } @@ -441,10 +535,117 @@ impl std::error::Error for WrappedReqwestError { fn source(&self) -> Option<&(dyn std::error::Error + 'static)> { if self.is_likely_offline() { // `Display` is inserting an extra message, so we need to show the wrapped error - Some(&self.0) + Some(&self.error) + } else if self.problem_details.is_some() { + // `Display` is showing problem details, so show the wrapped error as source + Some(&self.error) } else { // `Display` is showing the wrapped error, continue with its source - self.0.source() + self.error.source() } } } + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_problem_details_parsing() { + let json = r#"{ + "type": "https://example.com/probs/out-of-credit", + "title": "You do not have enough credit.", + "detail": "Your current balance is 30, but that costs 50.", + "status": 403, + "instance": "/account/12345/msgs/abc" + }"#; + + let problem_details: ProblemDetails = serde_json::from_slice(json.as_bytes()).unwrap(); + assert_eq!( + problem_details.problem_type, + "https://example.com/probs/out-of-credit" + ); + assert_eq!( + problem_details.title, + Some("You do not have enough credit.".to_string()) + ); + assert_eq!( + problem_details.detail, + Some("Your current balance is 30, but that costs 50.".to_string()) + ); + assert_eq!(problem_details.status, Some(403)); + assert_eq!( + problem_details.instance, + Some("/account/12345/msgs/abc".to_string()) + ); + } + + #[test] + fn test_problem_details_default_type() { + let json = r#"{ + "detail": "Something went wrong", + "status": 500 + }"#; + + let problem_details: ProblemDetails = serde_json::from_slice(json.as_bytes()).unwrap(); + assert_eq!(problem_details.problem_type, "about:blank"); + assert_eq!( + problem_details.detail, + Some("Something went wrong".to_string()) + ); + assert_eq!(problem_details.status, Some(500)); + } + + #[test] + fn test_problem_details_description() { + let json = r#"{ + "detail": "Detailed error message", + "title": "Error Title", + "status": 400 + }"#; + + let problem_details: ProblemDetails = serde_json::from_slice(json.as_bytes()).unwrap(); + assert_eq!( + problem_details.description().unwrap(), + "Server message: Error Title, Detailed error message" + ); + + let json_no_detail = r#"{ + "title": "Error Title", + "status": 400 + }"#; + + let problem_details: ProblemDetails = + serde_json::from_slice(json_no_detail.as_bytes()).unwrap(); + assert_eq!( + problem_details.description().unwrap(), + "Server message: Error Title" + ); + + let json_minimal = r#"{ + "status": 400 + }"#; + + let problem_details: ProblemDetails = + serde_json::from_slice(json_minimal.as_bytes()).unwrap(); + assert_eq!(problem_details.description().unwrap(), "HTTP error 400"); + } + + #[test] + fn test_problem_details_with_extensions() { + let json = r#"{ + "type": "https://example.com/probs/out-of-credit", + "title": "You do not have enough credit.", + "detail": "Your current balance is 30, but that costs 50.", + "status": 403, + "balance": 30, + "accounts": ["/account/12345", "/account/67890"] + }"#; + + let problem_details: ProblemDetails = serde_json::from_slice(json.as_bytes()).unwrap(); + assert_eq!( + problem_details.title, + Some("You do not have enough credit.".to_string()) + ); + } +} diff --git a/crates/uv/tests/it/network.rs b/crates/uv/tests/it/network.rs index 565fc0104..f541528bc 100644 --- a/crates/uv/tests/it/network.rs +++ b/crates/uv/tests/it/network.rs @@ -360,3 +360,52 @@ async fn install_http_retries() { " ); } + +/// Test problem details with a 403 error containing license compliance information +#[tokio::test] +async fn rfc9457_problem_details_license_violation() { + let context = TestContext::new("3.12"); + + let server = MockServer::start().await; + + let problem_json = r#"{ + "type": "https://example.com/probs/license-violation", + "title": "License Compliance Issue", + "status": 403, + "detail": "This package version has a license that violates organizational policy." + }"#; + + // Mock HEAD request to return 200 OK + Mock::given(method("HEAD")) + .respond_with(ResponseTemplate::new(StatusCode::OK)) + .mount(&server) + .await; + + // Mock GET request to return 403 with problem details + Mock::given(method("GET")) + .respond_with( + ResponseTemplate::new(StatusCode::FORBIDDEN) + .set_body_raw(problem_json, "application/problem+json"), + ) + .mount(&server) + .await; + + let mock_server_uri = server.uri(); + let tqdm_url = format!("{mock_server_uri}/packages/tqdm-4.67.1-py3-none-any.whl"); + + let filters = vec![(mock_server_uri.as_str(), "[SERVER]")]; + uv_snapshot!(filters, context + .pip_install() + .arg(format!("tqdm @ {tqdm_url}")) + .env_remove(EnvVars::UV_HTTP_RETRIES), @r" + success: false + exit_code: 1 + ----- stdout ----- + + ----- stderr ----- + × Failed to download `tqdm @ [SERVER]/packages/tqdm-4.67.1-py3-none-any.whl` + ├─▶ Failed to fetch: `[SERVER]/packages/tqdm-4.67.1-py3-none-any.whl` + ├─▶ Server message: License Compliance Issue, This package version has a license that violates organizational policy. + ╰─▶ HTTP status client error (403 Forbidden) for url ([SERVER]/packages/tqdm-4.67.1-py3-none-any.whl) + "); +}