mirror of
https://github.com/astral-sh/uv.git
synced 2025-07-07 21:35:00 +00:00
Keep track of retries in ManagedPythonDownload::fetch_with_retry
If/when we see https://github.com/astral-sh/uv/issues/14171 again, this should clarify whether our retry logic was skipped (i.e. a transient error wasn't correctly identified as transient), or whether we exhausted our retries. Previously, if you ran a local example fileserver as in https://github.com/astral-sh/uv/issues/14171#issuecomment-3014580701 and then you tried to install Python from it, you'd get: ``` $ export UV_TEST_NO_CLI_PROGRESS=1 $ uv python install 3.8.20 --mirror http://localhost:8000 2>&1 | cat error: Failed to install cpython-3.8.20-linux-x86_64-gnu Caused by: Failed to extract archive: cpython-3.8.20-20241002-x86_64-unknown-linux-gnu-install_only_stripped.tar.gz Caused by: failed to unpack `/home/jacko/.local/share/uv/python/.temp/.tmpS4sHHZ/python/lib/libpython3.8.so.1.0` Caused by: failed to unpack `python/lib/libpython3.8.so.1.0` into `/home/jacko/.local/share/uv/python/.temp/.tmpS4sHHZ/python/lib/libpython3.8.so.1.0` Caused by: error decoding response body Caused by: request or response body error Caused by: error reading a body from connection Caused by: Connection reset by peer (os error 104) ``` With this change you get: ``` error: Failed to install cpython-3.8.20-linux-x86_64-gnu Caused by: Request failed after 3 retries Caused by: Failed to extract archive: cpython-3.8.20-20241002-x86_64-unknown-linux-gnu-install_only_stripped.tar.gz Caused by: failed to unpack `/home/jacko/.local/share/uv/python/.temp/.tmp4Ia24w/python/lib/libpython3.8.so.1.0` Caused by: failed to unpack `python/lib/libpython3.8.so.1.0` into `/home/jacko/.local/share/uv/python/.temp/.tmp4Ia24w/python/lib/libpython3.8.so.1.0` Caused by: error decoding response body Caused by: request or response body error Caused by: error reading a body from connection Caused by: Connection reset by peer (os error 104) ``` At the same time, I'm updating the way we handle the retry count to avoid nested retry loops exceeding the intended number of attempts, as I mentioned at https://github.com/astral-sh/uv/issues/14069#issuecomment-3020634281. It's not clear to me whether we actually want this part of the change, and I need feedback here.
This commit is contained in:
parent
c078683217
commit
85358fe9c6
1 changed files with 64 additions and 20 deletions
|
@ -12,7 +12,7 @@ use futures::TryStreamExt;
|
||||||
use itertools::Itertools;
|
use itertools::Itertools;
|
||||||
use once_cell::sync::OnceCell;
|
use once_cell::sync::OnceCell;
|
||||||
use owo_colors::OwoColorize;
|
use owo_colors::OwoColorize;
|
||||||
use reqwest_retry::RetryPolicy;
|
use reqwest_retry::{RetryError, RetryPolicy};
|
||||||
use serde::Deserialize;
|
use serde::Deserialize;
|
||||||
use thiserror::Error;
|
use thiserror::Error;
|
||||||
use tokio::io::{AsyncRead, AsyncWriteExt, BufWriter, ReadBuf};
|
use tokio::io::{AsyncRead, AsyncWriteExt, BufWriter, ReadBuf};
|
||||||
|
@ -111,6 +111,33 @@ pub enum Error {
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
|
impl Error {
|
||||||
|
// Return the number of attempts that were made to complete this request before this error was
|
||||||
|
// returned. Note that e.g. 3 retries equates to 4 attempts.
|
||||||
|
//
|
||||||
|
// It's easier to do arithmetic with "attempts" instead of "retries", because if you have
|
||||||
|
// nested retry loops you can just add up all the attempts directly, while adding up the
|
||||||
|
// retries requires +1/-1 adjustments.
|
||||||
|
fn attempts(&self) -> u32 {
|
||||||
|
// Unfortunately different variants of `Error` track retry counts in different ways. We
|
||||||
|
// could consider unifying the variants we handle here in `Error::from_reqwest_middleware`
|
||||||
|
// instead, but both approaches will be fragile as new variants get added over time.
|
||||||
|
if let Error::NetworkErrorWithRetries { retries, .. } = self {
|
||||||
|
return retries + 1;
|
||||||
|
}
|
||||||
|
// TODO(jack): let-chains are stable as of Rust 1.88. We should use them here as soon as
|
||||||
|
// our rust-version is high enough.
|
||||||
|
if let Error::NetworkMiddlewareError(_, anyhow_error) = self {
|
||||||
|
if let Some(RetryError::WithRetries { retries, .. }) =
|
||||||
|
anyhow_error.downcast_ref::<RetryError>()
|
||||||
|
{
|
||||||
|
return retries + 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
1
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
#[derive(Debug, PartialEq, Eq, Clone, Hash)]
|
#[derive(Debug, PartialEq, Eq, Clone, Hash)]
|
||||||
pub struct ManagedPythonDownload {
|
pub struct ManagedPythonDownload {
|
||||||
key: PythonInstallationKey,
|
key: PythonInstallationKey,
|
||||||
|
@ -695,7 +722,8 @@ impl ManagedPythonDownload {
|
||||||
pypy_install_mirror: Option<&str>,
|
pypy_install_mirror: Option<&str>,
|
||||||
reporter: Option<&dyn Reporter>,
|
reporter: Option<&dyn Reporter>,
|
||||||
) -> Result<DownloadResult, Error> {
|
) -> Result<DownloadResult, Error> {
|
||||||
let mut n_past_retries = 0;
|
let mut total_attempts = 0;
|
||||||
|
let mut retried_here = false;
|
||||||
let start_time = SystemTime::now();
|
let start_time = SystemTime::now();
|
||||||
let retry_policy = client.retry_policy();
|
let retry_policy = client.retry_policy();
|
||||||
loop {
|
loop {
|
||||||
|
@ -710,25 +738,41 @@ impl ManagedPythonDownload {
|
||||||
reporter,
|
reporter,
|
||||||
)
|
)
|
||||||
.await;
|
.await;
|
||||||
if result
|
let result = match result {
|
||||||
.as_ref()
|
Ok(download_result) => Ok(download_result),
|
||||||
.err()
|
Err(err) => {
|
||||||
.is_some_and(|err| is_extended_transient_error(err))
|
// Inner retry loops (e.g. `reqwest-retry` middleware) might make more than one
|
||||||
{
|
// attempt per error we see here.
|
||||||
let retry_decision = retry_policy.should_retry(start_time, n_past_retries);
|
total_attempts += err.attempts();
|
||||||
if let reqwest_retry::RetryDecision::Retry { execute_after } = retry_decision {
|
// We currently interpret e.g. "3 retries" to mean we should make 4 attempts.
|
||||||
debug!(
|
let n_past_retries = total_attempts - 1;
|
||||||
"Transient failure while handling response for {}; retrying...",
|
if is_extended_transient_error(&err) {
|
||||||
self.key()
|
let retry_decision = retry_policy.should_retry(start_time, n_past_retries);
|
||||||
);
|
if let reqwest_retry::RetryDecision::Retry { execute_after } =
|
||||||
let duration = execute_after
|
retry_decision
|
||||||
.duration_since(SystemTime::now())
|
{
|
||||||
.unwrap_or_else(|_| Duration::default());
|
debug!(
|
||||||
tokio::time::sleep(duration).await;
|
"Transient failure while handling response for {}; retrying...",
|
||||||
n_past_retries += 1;
|
self.key()
|
||||||
continue;
|
);
|
||||||
|
let duration = execute_after
|
||||||
|
.duration_since(SystemTime::now())
|
||||||
|
.unwrap_or_else(|_| Duration::default());
|
||||||
|
tokio::time::sleep(duration).await;
|
||||||
|
retried_here = true;
|
||||||
|
continue; // Retry.
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if retried_here {
|
||||||
|
Err(Error::NetworkErrorWithRetries {
|
||||||
|
err: Box::new(err),
|
||||||
|
retries: n_past_retries,
|
||||||
|
})
|
||||||
|
} else {
|
||||||
|
Err(err)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
};
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue