diff --git a/crates/uv-cache/src/lib.rs b/crates/uv-cache/src/lib.rs index 860db6d09..ab0b43123 100644 --- a/crates/uv-cache/src/lib.rs +++ b/crates/uv-cache/src/lib.rs @@ -1116,19 +1116,7 @@ impl CacheBucket { let root = cache.bucket(self); summary += rm_rf(root)?; } - Self::Git => { - // Nothing to do. - } - Self::Interpreter => { - // Nothing to do. - } - Self::Archive => { - // Nothing to do. - } - Self::Builds => { - // Nothing to do. - } - Self::Environments => { + Self::Git | Self::Interpreter | Self::Archive | Self::Builds | Self::Environments => { // Nothing to do. } } diff --git a/crates/uv-python/src/downloads.rs b/crates/uv-python/src/downloads.rs index 95fe84995..8683a0526 100644 --- a/crates/uv-python/src/downloads.rs +++ b/crates/uv-python/src/downloads.rs @@ -1,12 +1,12 @@ use std::borrow::Cow; use std::collections::HashMap; use std::fmt::Display; -use std::io; use std::path::{Path, PathBuf}; use std::pin::Pin; use std::str::FromStr; use std::task::{Context, Poll}; use std::time::{Duration, SystemTime}; +use std::{env, io}; use futures::TryStreamExt; use itertools::Itertools; @@ -15,13 +15,13 @@ use owo_colors::OwoColorize; use reqwest_retry::RetryPolicy; use serde::Deserialize; use thiserror::Error; -use tokio::io::{AsyncRead, ReadBuf}; +use tokio::io::{AsyncRead, AsyncWriteExt, BufWriter, ReadBuf}; use tokio_util::compat::FuturesAsyncReadCompatExt; use tokio_util::either::Either; use tracing::{debug, instrument}; use url::Url; -use uv_client::{is_extended_transient_error, WrappedReqwestError}; +use uv_client::{is_extended_transient_error, BaseClient, WrappedReqwestError}; use uv_distribution_filename::{ExtensionError, SourceDistExtension}; use uv_extract::hash::Hasher; use uv_fs::{rename_with_retry, Simplified}; @@ -96,6 +96,12 @@ pub enum Error { RemoteJSONNotSupported(), #[error("The json of the python downloads is invalid: {0}")] InvalidPythonDownloadsJSON(String, #[source] serde_json::Error), + #[error("An offline Python installation was requested, but {file} (from {url}) is missing in {}", python_builds_dir.user_display())] + OfflinePythonMissing { + file: Box, + url: Box, + python_builds_dir: PathBuf, + }, } #[derive(Debug, PartialEq, Clone)] @@ -509,6 +515,7 @@ impl ManagedPythonDownload { Err(Error::NoDownloadFound(request.clone())) } + //noinspection RsUnresolvedPath - RustRover can't see through the `include!` /// Iterate over all [`ManagedPythonDownload`]s. pub fn iter_all() -> Result, Error> { let runtime_source = std::env::var(EnvVars::UV_PYTHON_DOWNLOADS_JSON_URL); @@ -560,7 +567,7 @@ impl ManagedPythonDownload { #[instrument(skip(client, installation_dir, scratch_dir, reporter), fields(download = % self.key()))] pub async fn fetch_with_retry( &self, - client: &uv_client::BaseClient, + client: &BaseClient, installation_dir: &Path, scratch_dir: &Path, reinstall: bool, @@ -610,7 +617,7 @@ impl ManagedPythonDownload { #[instrument(skip(client, installation_dir, scratch_dir, reporter), fields(download = % self.key()))] pub async fn fetch( &self, - client: &uv_client::BaseClient, + client: &BaseClient, installation_dir: &Path, scratch_dir: &Path, reinstall: bool, @@ -626,62 +633,109 @@ impl ManagedPythonDownload { return Ok(DownloadResult::AlreadyAvailable(path)); } - let filename = url.path_segments().unwrap().next_back().unwrap(); - let ext = SourceDistExtension::from_path(filename) + // We improve filesystem compatibility by using neither the URL-encoded `%2B` nor the `+` it + // decodes to. + let filename = url + .path_segments() + .unwrap() + .next_back() + .unwrap() + .replace("%2B", "-"); + debug_assert!( + filename + .chars() + .all(|c| c.is_ascii_alphanumeric() || c == '-' || c == '_' || c == '.'), + "Unexpected char in filename: {filename}" + ); + let ext = SourceDistExtension::from_path(&filename) .map_err(|err| Error::MissingExtension(url.to_string(), err))?; - let (reader, size) = read_url(&url, client).await?; - let progress = reporter - .as_ref() - .map(|reporter| (reporter, reporter.on_download_start(&self.key, size))); - - // Download and extract into a temporary directory. let temp_dir = tempfile::tempdir_in(scratch_dir).map_err(Error::DownloadDirError)?; - debug!( - "Downloading {url} to temporary location: {}", - temp_dir.path().simplified_display() - ); + if let Some(python_builds_dir) = env::var_os(EnvVars::UV_PYTHON_CACHE_DIR) { + let python_builds_dir = PathBuf::from(python_builds_dir); + fs_err::create_dir_all(&python_builds_dir)?; + let hash_prefix = match self.sha256 { + Some(sha) => { + // Shorten the hash to avoid too-long-filename errors + &sha[..9] + } + None => "none", + }; + let target_cache_file = python_builds_dir.join(format!("{hash_prefix}-{filename}")); - let mut hashers = self - .sha256 - .into_iter() - .map(|_| Hasher::from(HashAlgorithm::Sha256)) - .collect::>(); - let mut hasher = uv_extract::hash::HashReader::new(reader, &mut hashers); + // Download the archive to the cache, or return a reader if we have it in cache. + // TODO(konsti): We should "tee" the write so we can do the download-to-cache and unpacking + // in one step. + let (reader, size): (Box, Option) = + match fs_err::tokio::File::open(&target_cache_file).await { + Ok(file) => { + debug!( + "Extracting existing `{}`", + target_cache_file.simplified_display() + ); + let size = file.metadata().await?.len(); + let reader = Box::new(tokio::io::BufReader::new(file)); + (reader, Some(size)) + } + Err(err) if err.kind() == io::ErrorKind::NotFound => { + // Point the user to which file is missing where and where to download it + if client.connectivity().is_offline() { + return Err(Error::OfflinePythonMissing { + file: Box::new(self.key().clone()), + url: Box::new(url), + python_builds_dir, + }); + } - debug!("Extracting {filename}"); + self.download_archive( + &url, + client, + reporter, + &python_builds_dir, + &target_cache_file, + ) + .await?; - match progress { - Some((&reporter, progress)) => { - let mut reader = ProgressReader::new(&mut hasher, progress, reporter); - uv_extract::stream::archive(&mut reader, ext, temp_dir.path()) - .await - .map_err(|err| Error::ExtractError(filename.to_string(), err))?; - } - None => { - uv_extract::stream::archive(&mut hasher, ext, temp_dir.path()) - .await - .map_err(|err| Error::ExtractError(filename.to_string(), err))?; - } - } + debug!("Extracting `{}`", target_cache_file.simplified_display()); + let file = fs_err::tokio::File::open(&target_cache_file).await?; + let size = file.metadata().await?.len(); + let reader = Box::new(tokio::io::BufReader::new(file)); + (reader, Some(size)) + } + Err(err) => return Err(err.into()), + }; - hasher.finish().await.map_err(Error::HashExhaustion)?; + // Extract the downloaded archive into a temporary directory. + self.extract_reader( + reader, + temp_dir.path(), + &filename, + ext, + size, + reporter, + Direction::Extract, + ) + .await?; + } else { + // Avoid overlong log lines + debug!("Downloading {url}"); + debug!( + "Extracting {filename} to temporary location: {}", + temp_dir.path().simplified_display() + ); - if let Some((&reporter, progress)) = progress { - reporter.on_progress(&self.key, progress); - } - - // Check the hash - if let Some(expected) = self.sha256 { - let actual = HashDigest::from(hashers.pop().unwrap()).digest; - if !actual.eq_ignore_ascii_case(expected) { - return Err(Error::HashMismatch { - installation: self.key.to_string(), - expected: expected.to_string(), - actual: actual.to_string(), - }); - } + let (reader, size) = read_url(&url, client).await?; + self.extract_reader( + reader, + temp_dir.path(), + &filename, + ext, + size, + reporter, + Direction::Download, + ) + .await?; } // Extract the top-level directory. @@ -729,6 +783,97 @@ impl ManagedPythonDownload { Ok(DownloadResult::Fetched(path)) } + /// Download the managed Python archive into the cache directory. + async fn download_archive( + &self, + url: &Url, + client: &BaseClient, + reporter: Option<&dyn Reporter>, + python_builds_dir: &Path, + target_cache_file: &Path, + ) -> Result<(), Error> { + debug!( + "Downloading {} to `{}`", + url, + target_cache_file.simplified_display() + ); + + let (mut reader, size) = read_url(url, client).await?; + let temp_dir = tempfile::tempdir_in(python_builds_dir)?; + let temp_file = temp_dir.path().join("download"); + + // Download to a temporary file. We verify the hash when unpacking the file. + { + let mut archive_writer = BufWriter::new(fs_err::tokio::File::create(&temp_file).await?); + + // Download with or without progress bar. + if let Some(reporter) = reporter { + let key = reporter.on_request_start(Direction::Download, &self.key, size); + tokio::io::copy( + &mut ProgressReader::new(reader, key, reporter), + &mut archive_writer, + ) + .await?; + reporter.on_request_complete(Direction::Download, key); + } else { + tokio::io::copy(&mut reader, &mut archive_writer).await?; + } + + archive_writer.flush().await?; + } + // Move the completed file into place, invalidating the `File` instance. + fs_err::rename(&temp_file, target_cache_file)?; + Ok(()) + } + + /// Extract a Python interpreter archive into a (temporary) directory, either from a file or + /// from a download stream. + async fn extract_reader( + &self, + reader: impl AsyncRead + Unpin, + target: &Path, + filename: &String, + ext: SourceDistExtension, + size: Option, + reporter: Option<&dyn Reporter>, + direction: Direction, + ) -> Result<(), Error> { + let mut hashers = self + .sha256 + .into_iter() + .map(|_| Hasher::from(HashAlgorithm::Sha256)) + .collect::>(); + let mut hasher = uv_extract::hash::HashReader::new(reader, &mut hashers); + + if let Some(reporter) = reporter { + let progress_key = reporter.on_request_start(direction, &self.key, size); + let mut reader = ProgressReader::new(&mut hasher, progress_key, reporter); + uv_extract::stream::archive(&mut reader, ext, target) + .await + .map_err(|err| Error::ExtractError(filename.to_string(), err))?; + reporter.on_request_complete(direction, progress_key); + } else { + uv_extract::stream::archive(&mut hasher, ext, target) + .await + .map_err(|err| Error::ExtractError(filename.to_string(), err))?; + } + hasher.finish().await.map_err(Error::HashExhaustion)?; + + // Check the hash + if let Some(expected) = self.sha256 { + let actual = HashDigest::from(hashers.pop().unwrap()).digest; + if !actual.eq_ignore_ascii_case(expected) { + return Err(Error::HashMismatch { + installation: self.key.to_string(), + expected: expected.to_string(), + actual: actual.to_string(), + }); + } + } + + Ok(()) + } + pub fn python_version(&self) -> PythonVersion { self.key.version() } @@ -905,11 +1050,36 @@ impl Display for ManagedPythonDownload { } } +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum Direction { + Download, + Extract, +} + +impl Direction { + fn as_str(&self) -> &str { + match self { + Direction::Download => "download", + Direction::Extract => "extract", + } + } +} + +impl Display for Direction { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.write_str(self.as_str()) + } +} + pub trait Reporter: Send + Sync { - fn on_progress(&self, name: &PythonInstallationKey, id: usize); - fn on_download_start(&self, name: &PythonInstallationKey, size: Option) -> usize; - fn on_download_progress(&self, id: usize, inc: u64); - fn on_download_complete(&self); + fn on_request_start( + &self, + direction: Direction, + name: &PythonInstallationKey, + size: Option, + ) -> usize; + fn on_request_progress(&self, id: usize, inc: u64); + fn on_request_complete(&self, direction: Direction, id: usize); } /// An asynchronous reader that reports progress as bytes are read. @@ -943,7 +1113,7 @@ where .poll_read(cx, buf) .map_ok(|()| { self.reporter - .on_download_progress(self.index, buf.filled().len() as u64); + .on_request_progress(self.index, buf.filled().len() as u64); }) } } @@ -951,7 +1121,7 @@ where /// Convert a [`Url`] into an [`AsyncRead`] stream. async fn read_url( url: &Url, - client: &uv_client::BaseClient, + client: &BaseClient, ) -> Result<(impl AsyncRead + Unpin, Option), Error> { if url.scheme() == "file" { // Loads downloaded distribution from the given `file://` URL. diff --git a/crates/uv-static/src/env_vars.rs b/crates/uv-static/src/env_vars.rs index 388e934ae..58191fe64 100644 --- a/crates/uv-static/src/env_vars.rs +++ b/crates/uv-static/src/env_vars.rs @@ -270,6 +270,10 @@ impl EnvVars { /// Note that currently, only local paths are supported. pub const UV_PYTHON_DOWNLOADS_JSON_URL: &'static str = "UV_PYTHON_DOWNLOADS_JSON_URL"; + /// Specifies the directory for caching the archives of managed Python installations before + /// installation. + pub const UV_PYTHON_CACHE_DIR: &'static str = "UV_PYTHON_CACHE_DIR"; + /// Managed Python installations are downloaded from the Astral /// [`python-build-standalone`](https://github.com/astral-sh/python-build-standalone) project. /// diff --git a/crates/uv/src/commands/python/install.rs b/crates/uv/src/commands/python/install.rs index a5c3b9555..ffd8d7e63 100644 --- a/crates/uv/src/commands/python/install.rs +++ b/crates/uv/src/commands/python/install.rs @@ -218,7 +218,7 @@ pub(crate) async fn install( for installation in matching_installations { changelog.existing.insert(installation.key().clone()); if matches!(&request.request, &PythonRequest::Any) { - // Construct a install request matching the existing installation + // Construct an install request matching the existing installation match InstallRequest::new(PythonRequest::Key(installation.into())) { Ok(request) => { debug!("Will reinstall `{}`", installation.key().green()); diff --git a/crates/uv/src/commands/reporters.rs b/crates/uv/src/commands/reporters.rs index e27286eea..70cd8fcec 100644 --- a/crates/uv/src/commands/reporters.rs +++ b/crates/uv/src/commands/reporters.rs @@ -69,6 +69,26 @@ impl BarState { enum Direction { Upload, Download, + Extract, +} + +impl Direction { + fn as_str(&self) -> &str { + match self { + Direction::Download => "Downloading", + Direction::Upload => "Uploading", + Direction::Extract => "Extracting", + } + } +} + +impl From for Direction { + fn from(dir: uv_python::downloads::Direction) -> Self { + match dir { + uv_python::downloads::Direction::Download => Self::Download, + uv_python::downloads::Direction::Extract => Self::Extract, + } + } } impl ProgressReporter { @@ -191,6 +211,7 @@ impl ProgressReporter { match direction { Direction::Download => "Downloading", Direction::Upload => "Uploading", + Direction::Extract => "Extracting", } .bold() .cyan(), @@ -205,12 +226,7 @@ impl ProgressReporter { let _ = writeln!( self.printer.stderr(), "{} {}", - match direction { - Direction::Download => "Downloading", - Direction::Upload => "Uploading", - } - .bold() - .cyan(), + direction.as_str().bold().cyan(), name ); } @@ -251,12 +267,7 @@ impl ProgressReporter { let _ = writeln!( self.printer.stderr(), " {} {}", - match direction { - Direction::Download => "Downloaded", - Direction::Upload => "Uploaded", - } - .bold() - .green(), + direction.as_str().bold().green(), progress.message() ); } @@ -583,21 +594,22 @@ impl PythonDownloadReporter { } impl uv_python::downloads::Reporter for PythonDownloadReporter { - fn on_progress(&self, _name: &PythonInstallationKey, id: usize) { - self.reporter.on_download_complete(id); + fn on_request_start( + &self, + direction: uv_python::downloads::Direction, + name: &PythonInstallationKey, + size: Option, + ) -> usize { + self.reporter + .on_request_start(direction.into(), format!("{name} ({direction})"), size) } - fn on_download_start(&self, name: &PythonInstallationKey, size: Option) -> usize { - self.reporter.on_download_start(name.to_string(), size) + fn on_request_progress(&self, id: usize, inc: u64) { + self.reporter.on_request_progress(id, inc); } - fn on_download_progress(&self, id: usize, inc: u64) { - self.reporter.on_download_progress(id, inc); - } - - fn on_download_complete(&self) { - self.reporter.root.set_message(""); - self.reporter.root.finish_and_clear(); + fn on_request_complete(&self, direction: uv_python::downloads::Direction, id: usize) { + self.reporter.on_request_complete(direction.into(), id); } } diff --git a/crates/uv/tests/it/python_install.rs b/crates/uv/tests/it/python_install.rs index 6f69ff3d2..9129ce163 100644 --- a/crates/uv/tests/it/python_install.rs +++ b/crates/uv/tests/it/python_install.rs @@ -1,4 +1,4 @@ -use std::{path::Path, process::Command}; +use std::{env, path::Path, process::Command}; use crate::common::{uv_snapshot, TestContext}; use assert_fs::{ @@ -6,6 +6,7 @@ use assert_fs::{ prelude::{FileTouch, PathChild, PathCreateDir}, }; use predicates::prelude::predicate; +use tracing::debug; use uv_fs::Simplified; use uv_static::EnvVars; @@ -1274,3 +1275,98 @@ fn python_install_314() { ----- stderr ----- "); } + +/// Test caching Python archives with `UV_PYTHON_CACHE_DIR`. +#[test] +fn python_install_cached() { + // It does not make sense to run this test when the developer selected faster test runs + // by setting the env var. + if env::var_os("UV_PYTHON_CACHE_DIR").is_some() { + debug!("Skipping test because UV_PYTHON_CACHE_DIR is set"); + return; + } + + let context: TestContext = TestContext::new_with_versions(&[]) + .with_filtered_python_keys() + .with_filtered_exe_suffix() + .with_managed_python_dirs(); + + let python_cache = context.temp_dir.child("python-cache"); + + // Install the latest version + uv_snapshot!(context.filters(), context + .python_install() + .env(EnvVars::UV_PYTHON_CACHE_DIR, python_cache.as_ref()), @r" + success: true + exit_code: 0 + ----- stdout ----- + + ----- stderr ----- + Installed Python 3.13.3 in [TIME] + + cpython-3.13.3-[PLATFORM] + "); + + let bin_python = context + .bin_dir + .child(format!("python3.13{}", std::env::consts::EXE_SUFFIX)); + + // The executable should not be installed in the bin directory (requires preview) + bin_python.assert(predicate::path::missing()); + + // Should be a no-op when already installed + uv_snapshot!(context.filters(), context + .python_install() + .env(EnvVars::UV_PYTHON_CACHE_DIR, python_cache.as_ref()), @r###" + success: true + exit_code: 0 + ----- stdout ----- + + ----- stderr ----- + Python is already installed. Use `uv python install ` to install another version. + "###); + + uv_snapshot!(context.filters(), context.python_uninstall().arg("3.13"), @r" + success: true + exit_code: 0 + ----- stdout ----- + + ----- stderr ----- + Searching for Python versions matching: Python 3.13 + Uninstalled Python 3.13.3 in [TIME] + - cpython-3.13.3-[PLATFORM] + "); + + // The cached archive can be installed offline + uv_snapshot!(context.filters(), context + .python_install() + .arg("--offline") + .env(EnvVars::UV_PYTHON_CACHE_DIR, python_cache.as_ref()), @r" + success: true + exit_code: 0 + ----- stdout ----- + + ----- stderr ----- + Installed Python 3.13.3 in [TIME] + + cpython-3.13.3-[PLATFORM] + "); + + // 3.12 isn't cached, so it can't be installed + let mut filters = context.filters(); + filters.push(( + "cpython-3.12.10.*.tar.gz", + "cpython-3.12.10[DATE]-[PLATFORM].tar.gz", + )); + uv_snapshot!(filters, context + .python_install() + .arg("3.12") + .arg("--offline") + .env(EnvVars::UV_PYTHON_CACHE_DIR, python_cache.as_ref()), @r" + success: false + exit_code: 1 + ----- stdout ----- + + ----- stderr ----- + error: Failed to install cpython-3.12.10-[PLATFORM] + Caused by: An offline Python installation was requested, but cpython-3.12.10[DATE]-[PLATFORM].tar.gz) is missing in python-cache + "); +} diff --git a/docs/configuration/environment.md b/docs/configuration/environment.md index 08a9d5adf..f86f52bca 100644 --- a/docs/configuration/environment.md +++ b/docs/configuration/environment.md @@ -339,6 +339,11 @@ this Python interpreter for all operations. Specifies the directory to place links to installed, managed Python executables. +### `UV_PYTHON_CACHE_DIR` + +Specifies the directory for caching the archives of managed Python installations before +installation. + ### `UV_PYTHON_DOWNLOADS` Equivalent to the