Retry on connection reset network errors (#4960)

See helpful discussion at
https://github.com/seanmonstar/reqwest/issues/1602#issuecomment-1220990725
and https://github.com/astral-sh/uv/issues/3514#issuecomment-2216986250

Should help with #3514 though I'll wait to close until it's confirmed as
we cannot reproduce this.
This commit is contained in:
Zanie Blue 2024-07-10 11:08:47 -04:00 committed by GitHub
parent d497adaacb
commit f5dce1124b
No known key found for this signature in database
GPG key ID: B5690EEEBB952194

View file

@ -188,7 +188,7 @@ impl<'a> BaseClientBuilder<'a> {
ExponentialBackoff::builder().build_with_max_retries(self.retries); ExponentialBackoff::builder().build_with_max_retries(self.retries);
let retry_strategy = RetryTransientMiddleware::new_with_policy_and_strategy( let retry_strategy = RetryTransientMiddleware::new_with_policy_and_strategy(
retry_policy, retry_policy,
LoggingRetryableStrategy, UvRetryableStrategy,
); );
let client = client.with(retry_strategy); let client = client.with(retry_strategy);
@ -249,13 +249,20 @@ impl Deref for BaseClient {
} }
} }
/// The same as [`DefaultRetryableStrategy`], but retry attempts on transient request failures are /// Extends [`DefaultRetryableStrategy`], to log transient request failures and additional retry cases.
/// logged, so we can tell whether a request was retried before failing or not. struct UvRetryableStrategy;
struct LoggingRetryableStrategy;
impl RetryableStrategy for LoggingRetryableStrategy { impl RetryableStrategy for UvRetryableStrategy {
fn handle(&self, res: &Result<Response, reqwest_middleware::Error>) -> Option<Retryable> { fn handle(&self, res: &Result<Response, reqwest_middleware::Error>) -> Option<Retryable> {
let retryable = DefaultRetryableStrategy.handle(res); // Use the default strategy and check for additional transient error cases.
let retryable = match DefaultRetryableStrategy.handle(res) {
None | Some(Retryable::Fatal) if is_extended_transient_error(res) => {
Some(Retryable::Transient)
}
default => default,
};
// Log on transient errors
if retryable == Some(Retryable::Transient) { if retryable == Some(Retryable::Transient) {
match res { match res {
Ok(response) => { Ok(response) => {
@ -275,3 +282,35 @@ impl RetryableStrategy for LoggingRetryableStrategy {
retryable retryable
} }
} }
/// Check for additional transient error kinds not supported by the default retry strategy in `reqwest_retry`.
///
/// These cases should be safe to retry with [`Retryable::Transient`].
fn is_extended_transient_error(res: &Result<Response, reqwest_middleware::Error>) -> bool {
// Check for connection reset errors, these are usually `Body` errors which are not retried by default.
if let Err(reqwest_middleware::Error::Reqwest(err)) = res {
if let Some(io) = find_source::<std::io::Error>(&err) {
if io.kind() == std::io::ErrorKind::ConnectionReset {
return true;
}
}
}
false
}
/// Find the first source error of a specific type.
///
/// See <https://github.com/seanmonstar/reqwest/issues/1602#issuecomment-1220996681>
fn find_source<E: std::error::Error + 'static>(orig: &dyn std::error::Error) -> Option<&E> {
let mut cause = orig.source();
while let Some(err) = cause {
if let Some(typed) = err.downcast_ref() {
return Some(typed);
}
cause = err.source();
}
// else
None
}