mirror of
https://github.com/astral-sh/uv.git
synced 2025-07-07 13:25:00 +00:00
Keep track of retries in ManagedPythonDownload::fetch_with_retry
If/when we see https://github.com/astral-sh/uv/issues/14171 again, this should clarify whether our retry logic was skipped (i.e. a transient error wasn't correctly identified as transient), or whether we exhausted our retries. Previously, if you ran a local example fileserver as in https://github.com/astral-sh/uv/issues/14171#issuecomment-3014580701 and then you tried to install Python from it, you'd get: ``` $ export UV_TEST_NO_CLI_PROGRESS=1 $ uv python install 3.8.20 --mirror http://localhost:8000 2>&1 | cat error: Failed to install cpython-3.8.20-linux-x86_64-gnu Caused by: Failed to extract archive: cpython-3.8.20-20241002-x86_64-unknown-linux-gnu-install_only_stripped.tar.gz Caused by: failed to unpack `/home/jacko/.local/share/uv/python/.temp/.tmpS4sHHZ/python/lib/libpython3.8.so.1.0` Caused by: failed to unpack `python/lib/libpython3.8.so.1.0` into `/home/jacko/.local/share/uv/python/.temp/.tmpS4sHHZ/python/lib/libpython3.8.so.1.0` Caused by: error decoding response body Caused by: request or response body error Caused by: error reading a body from connection Caused by: Connection reset by peer (os error 104) ``` With this change you get: ``` error: Failed to install cpython-3.8.20-linux-x86_64-gnu Caused by: Request failed after 3 retries Caused by: Failed to extract archive: cpython-3.8.20-20241002-x86_64-unknown-linux-gnu-install_only_stripped.tar.gz Caused by: failed to unpack `/home/jacko/.local/share/uv/python/.temp/.tmp4Ia24w/python/lib/libpython3.8.so.1.0` Caused by: failed to unpack `python/lib/libpython3.8.so.1.0` into `/home/jacko/.local/share/uv/python/.temp/.tmp4Ia24w/python/lib/libpython3.8.so.1.0` Caused by: error decoding response body Caused by: request or response body error Caused by: error reading a body from connection Caused by: Connection reset by peer (os error 104) ``` At the same time, I'm updating the way we handle the retry count to avoid nested retry loops exceeding the intended number of attempts, as I mentioned at https://github.com/astral-sh/uv/issues/14069#issuecomment-3020634281. It's not clear to me whether we actually want this part of the change, and I need feedback here.
This commit is contained in:
parent
c078683217
commit
85358fe9c6
1 changed files with 64 additions and 20 deletions
|
@ -12,7 +12,7 @@ use futures::TryStreamExt;
|
|||
use itertools::Itertools;
|
||||
use once_cell::sync::OnceCell;
|
||||
use owo_colors::OwoColorize;
|
||||
use reqwest_retry::RetryPolicy;
|
||||
use reqwest_retry::{RetryError, RetryPolicy};
|
||||
use serde::Deserialize;
|
||||
use thiserror::Error;
|
||||
use tokio::io::{AsyncRead, AsyncWriteExt, BufWriter, ReadBuf};
|
||||
|
@ -111,6 +111,33 @@ pub enum Error {
|
|||
},
|
||||
}
|
||||
|
||||
impl Error {
|
||||
// Return the number of attempts that were made to complete this request before this error was
|
||||
// returned. Note that e.g. 3 retries equates to 4 attempts.
|
||||
//
|
||||
// It's easier to do arithmetic with "attempts" instead of "retries", because if you have
|
||||
// nested retry loops you can just add up all the attempts directly, while adding up the
|
||||
// retries requires +1/-1 adjustments.
|
||||
fn attempts(&self) -> u32 {
|
||||
// Unfortunately different variants of `Error` track retry counts in different ways. We
|
||||
// could consider unifying the variants we handle here in `Error::from_reqwest_middleware`
|
||||
// instead, but both approaches will be fragile as new variants get added over time.
|
||||
if let Error::NetworkErrorWithRetries { retries, .. } = self {
|
||||
return retries + 1;
|
||||
}
|
||||
// TODO(jack): let-chains are stable as of Rust 1.88. We should use them here as soon as
|
||||
// our rust-version is high enough.
|
||||
if let Error::NetworkMiddlewareError(_, anyhow_error) = self {
|
||||
if let Some(RetryError::WithRetries { retries, .. }) =
|
||||
anyhow_error.downcast_ref::<RetryError>()
|
||||
{
|
||||
return retries + 1;
|
||||
}
|
||||
}
|
||||
1
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq, Eq, Clone, Hash)]
|
||||
pub struct ManagedPythonDownload {
|
||||
key: PythonInstallationKey,
|
||||
|
@ -695,7 +722,8 @@ impl ManagedPythonDownload {
|
|||
pypy_install_mirror: Option<&str>,
|
||||
reporter: Option<&dyn Reporter>,
|
||||
) -> Result<DownloadResult, Error> {
|
||||
let mut n_past_retries = 0;
|
||||
let mut total_attempts = 0;
|
||||
let mut retried_here = false;
|
||||
let start_time = SystemTime::now();
|
||||
let retry_policy = client.retry_policy();
|
||||
loop {
|
||||
|
@ -710,25 +738,41 @@ impl ManagedPythonDownload {
|
|||
reporter,
|
||||
)
|
||||
.await;
|
||||
if result
|
||||
.as_ref()
|
||||
.err()
|
||||
.is_some_and(|err| is_extended_transient_error(err))
|
||||
{
|
||||
let retry_decision = retry_policy.should_retry(start_time, n_past_retries);
|
||||
if let reqwest_retry::RetryDecision::Retry { execute_after } = retry_decision {
|
||||
debug!(
|
||||
"Transient failure while handling response for {}; retrying...",
|
||||
self.key()
|
||||
);
|
||||
let duration = execute_after
|
||||
.duration_since(SystemTime::now())
|
||||
.unwrap_or_else(|_| Duration::default());
|
||||
tokio::time::sleep(duration).await;
|
||||
n_past_retries += 1;
|
||||
continue;
|
||||
let result = match result {
|
||||
Ok(download_result) => Ok(download_result),
|
||||
Err(err) => {
|
||||
// Inner retry loops (e.g. `reqwest-retry` middleware) might make more than one
|
||||
// attempt per error we see here.
|
||||
total_attempts += err.attempts();
|
||||
// We currently interpret e.g. "3 retries" to mean we should make 4 attempts.
|
||||
let n_past_retries = total_attempts - 1;
|
||||
if is_extended_transient_error(&err) {
|
||||
let retry_decision = retry_policy.should_retry(start_time, n_past_retries);
|
||||
if let reqwest_retry::RetryDecision::Retry { execute_after } =
|
||||
retry_decision
|
||||
{
|
||||
debug!(
|
||||
"Transient failure while handling response for {}; retrying...",
|
||||
self.key()
|
||||
);
|
||||
let duration = execute_after
|
||||
.duration_since(SystemTime::now())
|
||||
.unwrap_or_else(|_| Duration::default());
|
||||
tokio::time::sleep(duration).await;
|
||||
retried_here = true;
|
||||
continue; // Retry.
|
||||
}
|
||||
}
|
||||
if retried_here {
|
||||
Err(Error::NetworkErrorWithRetries {
|
||||
err: Box::new(err),
|
||||
retries: n_past_retries,
|
||||
})
|
||||
} else {
|
||||
Err(err)
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
return result;
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue