uv/crates/uv-cache/src/lib.rs
konsti 3218e364ae
Some checks are pending
CI / build binary | linux musl (push) Blocked by required conditions
CI / build binary | macos aarch64 (push) Blocked by required conditions
CI / build binary | macos x86_64 (push) Blocked by required conditions
CI / smoke test | windows x86_64 (push) Blocked by required conditions
CI / smoke test | windows aarch64 (push) Blocked by required conditions
CI / integration test | conda on ubuntu (push) Blocked by required conditions
CI / integration test | deadsnakes python3.9 on ubuntu (push) Blocked by required conditions
CI / integration test | free-threaded on linux (push) Blocked by required conditions
CI / integration test | github actions (push) Blocked by required conditions
CI / check cache | macos aarch64 (push) Blocked by required conditions
CI / check system | python on debian (push) Blocked by required conditions
CI / check system | python on fedora (push) Blocked by required conditions
CI / check system | python on ubuntu (push) Blocked by required conditions
CI / check system | python on opensuse (push) Blocked by required conditions
CI / check system | python on macos aarch64 (push) Blocked by required conditions
CI / check system | homebrew python on macos aarch64 (push) Blocked by required conditions
CI / check system | python on macos x86-64 (push) Blocked by required conditions
CI / check system | python3.10 on windows x86-64 (push) Blocked by required conditions
CI / check system | python3.10 on windows x86 (push) Blocked by required conditions
CI / check system | python3.13 on windows x86-64 (push) Blocked by required conditions
CI / check system | x86-64 python3.13 on windows aarch64 (push) Blocked by required conditions
CI / check system | windows registry (push) Blocked by required conditions
CI / check system | python3.12 via chocolatey (push) Blocked by required conditions
CI / check system | conda3.8 on linux x86-64 (push) Blocked by required conditions
CI / check system | conda3.11 on windows x86-64 (push) Blocked by required conditions
CI / check system | conda3.8 on windows x86-64 (push) Blocked by required conditions
CI / check system | amazonlinux (push) Blocked by required conditions
CI / cargo test | ubuntu (push) Blocked by required conditions
CI / cargo clippy | ubuntu (push) Blocked by required conditions
CI / cargo clippy | windows (push) Blocked by required conditions
CI / cargo dev generate-all (push) Blocked by required conditions
CI / cargo shear (push) Waiting to run
CI / check windows trampoline | x86_64 (push) Blocked by required conditions
CI / test windows trampoline | i686 (push) Blocked by required conditions
CI / test windows trampoline | x86_64 (push) Blocked by required conditions
CI / typos (push) Waiting to run
CI / mkdocs (push) Waiting to run
CI / build binary | linux libc (push) Blocked by required conditions
CI / Determine changes (push) Waiting to run
CI / lint (push) Waiting to run
CI / cargo test | macos (push) Blocked by required conditions
CI / cargo test | windows (push) Blocked by required conditions
CI / check windows trampoline | aarch64 (push) Blocked by required conditions
CI / check windows trampoline | i686 (push) Blocked by required conditions
CI / build binary | windows x86_64 (push) Blocked by required conditions
CI / build binary | windows aarch64 (push) Blocked by required conditions
CI / cargo build (msrv) (push) Blocked by required conditions
CI / build binary | freebsd (push) Blocked by required conditions
CI / ecosystem test | pydantic/pydantic-core (push) Blocked by required conditions
CI / ecosystem test | prefecthq/prefect (push) Blocked by required conditions
CI / ecosystem test | pallets/flask (push) Blocked by required conditions
CI / smoke test | linux (push) Blocked by required conditions
CI / check system | alpine (push) Blocked by required conditions
CI / smoke test | macos (push) Blocked by required conditions
CI / integration test | free-threaded on windows (push) Blocked by required conditions
CI / integration test | pypy on ubuntu (push) Blocked by required conditions
CI / integration test | pypy on windows (push) Blocked by required conditions
CI / integration test | graalpy on ubuntu (push) Blocked by required conditions
CI / integration test | graalpy on windows (push) Blocked by required conditions
CI / integration test | free-threaded python on github actions (push) Blocked by required conditions
CI / integration test | determine publish changes (push) Blocked by required conditions
CI / integration test | uv publish (push) Blocked by required conditions
CI / integration test | uv_build (push) Blocked by required conditions
CI / check cache | ubuntu (push) Blocked by required conditions
CI / check system | python on rocky linux 8 (push) Blocked by required conditions
CI / check system | python on rocky linux 9 (push) Blocked by required conditions
CI / check system | pypy on ubuntu (push) Blocked by required conditions
CI / check system | pyston (push) Blocked by required conditions
CI / check system | python3.9 via pyenv (push) Blocked by required conditions
CI / check system | python3.13 (push) Blocked by required conditions
CI / check system | conda3.11 on macos aarch64 (push) Blocked by required conditions
CI / check system | conda3.8 on macos aarch64 (push) Blocked by required conditions
CI / check system | conda3.11 on linux x86-64 (push) Blocked by required conditions
CI / check system | embedded python3.10 on windows x86-64 (push) Blocked by required conditions
CI / benchmarks (push) Blocked by required conditions
Use fs_err for paths in symlinking errors (#13303)
In #13302, there was an IO error without context. This error seems to be
caused by a symlink error. Switching as symlinking to `fs_err` ensures
these errors will carry context in the future.
2025-05-05 16:29:27 +00:00

1279 lines
48 KiB
Rust
Raw Blame History

This file contains invisible Unicode characters

This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

use std::fmt::{Display, Formatter};
use std::io;
use std::io::Write;
use std::ops::Deref;
use std::path::{Path, PathBuf};
use std::str::FromStr;
use std::sync::Arc;
use rustc_hash::FxHashSet;
use tracing::debug;
pub use archive::ArchiveId;
use uv_cache_info::Timestamp;
use uv_distribution_filename::WheelFilename;
use uv_fs::{cachedir, directories, LockedFile};
use uv_normalize::PackageName;
use uv_pypi_types::ResolutionMetadata;
pub use crate::by_timestamp::CachedByTimestamp;
#[cfg(feature = "clap")]
pub use crate::cli::CacheArgs;
use crate::removal::Remover;
pub use crate::removal::{rm_rf, Removal};
pub use crate::wheel::WheelCache;
use crate::wheel::WheelCacheKind;
mod archive;
mod by_timestamp;
#[cfg(feature = "clap")]
mod cli;
mod removal;
mod wheel;
/// The version of the archive bucket.
///
/// Must be kept in-sync with the version in [`CacheBucket::to_str`].
pub const ARCHIVE_VERSION: u8 = 0;
/// A [`CacheEntry`] which may or may not exist yet.
#[derive(Debug, Clone)]
pub struct CacheEntry(PathBuf);
impl CacheEntry {
/// Create a new [`CacheEntry`] from a directory and a file name.
pub fn new(dir: impl Into<PathBuf>, file: impl AsRef<Path>) -> Self {
Self(dir.into().join(file))
}
/// Create a new [`CacheEntry`] from a path.
pub fn from_path(path: impl Into<PathBuf>) -> Self {
Self(path.into())
}
/// Return the cache entry's parent directory.
pub fn shard(&self) -> CacheShard {
CacheShard(self.dir().to_path_buf())
}
/// Convert the [`CacheEntry`] into a [`PathBuf`].
#[inline]
pub fn into_path_buf(self) -> PathBuf {
self.0
}
/// Return the path to the [`CacheEntry`].
#[inline]
pub fn path(&self) -> &Path {
&self.0
}
/// Return the cache entry's parent directory.
#[inline]
pub fn dir(&self) -> &Path {
self.0.parent().expect("Cache entry has no parent")
}
/// Create a new [`CacheEntry`] with the given file name.
#[must_use]
pub fn with_file(&self, file: impl AsRef<Path>) -> Self {
Self(self.dir().join(file))
}
/// Acquire the [`CacheEntry`] as an exclusive lock.
pub async fn lock(&self) -> Result<LockedFile, io::Error> {
fs_err::create_dir_all(self.dir())?;
LockedFile::acquire(self.path(), self.path().display()).await
}
}
impl AsRef<Path> for CacheEntry {
fn as_ref(&self) -> &Path {
&self.0
}
}
/// A subdirectory within the cache.
#[derive(Debug, Clone)]
pub struct CacheShard(PathBuf);
impl CacheShard {
/// Return a [`CacheEntry`] within this shard.
pub fn entry(&self, file: impl AsRef<Path>) -> CacheEntry {
CacheEntry::new(&self.0, file)
}
/// Return a [`CacheShard`] within this shard.
#[must_use]
pub fn shard(&self, dir: impl AsRef<Path>) -> Self {
Self(self.0.join(dir.as_ref()))
}
/// Acquire the cache entry as an exclusive lock.
pub async fn lock(&self) -> Result<LockedFile, io::Error> {
fs_err::create_dir_all(self.as_ref())?;
LockedFile::acquire(self.join(".lock"), self.display()).await
}
/// Return the [`CacheShard`] as a [`PathBuf`].
pub fn into_path_buf(self) -> PathBuf {
self.0
}
}
impl AsRef<Path> for CacheShard {
fn as_ref(&self) -> &Path {
&self.0
}
}
impl Deref for CacheShard {
type Target = Path;
fn deref(&self) -> &Self::Target {
&self.0
}
}
/// The main cache abstraction.
#[derive(Debug, Clone)]
pub struct Cache {
/// The cache directory.
root: PathBuf,
/// The refresh strategy to use when reading from the cache.
refresh: Refresh,
/// A temporary cache directory, if the user requested `--no-cache`.
///
/// Included to ensure that the temporary directory exists for the length of the operation, but
/// is dropped at the end as appropriate.
temp_dir: Option<Arc<tempfile::TempDir>>,
}
impl Cache {
/// A persistent cache directory at `root`.
pub fn from_path(root: impl Into<PathBuf>) -> Self {
Self {
root: root.into(),
refresh: Refresh::None(Timestamp::now()),
temp_dir: None,
}
}
/// Create a temporary cache directory.
pub fn temp() -> Result<Self, io::Error> {
let temp_dir = tempfile::tempdir()?;
Ok(Self {
root: temp_dir.path().to_path_buf(),
refresh: Refresh::None(Timestamp::now()),
temp_dir: Some(Arc::new(temp_dir)),
})
}
/// Set the [`Refresh`] policy for the cache.
#[must_use]
pub fn with_refresh(self, refresh: Refresh) -> Self {
Self { refresh, ..self }
}
/// Return the root of the cache.
pub fn root(&self) -> &Path {
&self.root
}
/// Return the [`Refresh`] policy for the cache.
pub fn refresh(&self) -> &Refresh {
&self.refresh
}
/// The folder for a specific cache bucket
pub fn bucket(&self, cache_bucket: CacheBucket) -> PathBuf {
self.root.join(cache_bucket.to_str())
}
/// Compute an entry in the cache.
pub fn shard(&self, cache_bucket: CacheBucket, dir: impl AsRef<Path>) -> CacheShard {
CacheShard(self.bucket(cache_bucket).join(dir.as_ref()))
}
/// Compute an entry in the cache.
pub fn entry(
&self,
cache_bucket: CacheBucket,
dir: impl AsRef<Path>,
file: impl AsRef<Path>,
) -> CacheEntry {
CacheEntry::new(self.bucket(cache_bucket).join(dir), file)
}
/// Return the path to an archive in the cache.
pub fn archive(&self, id: &ArchiveId) -> PathBuf {
self.bucket(CacheBucket::Archive).join(id)
}
/// Create a temporary directory to be used as a Python virtual environment.
pub fn venv_dir(&self) -> io::Result<tempfile::TempDir> {
fs_err::create_dir_all(self.bucket(CacheBucket::Builds))?;
tempfile::tempdir_in(self.bucket(CacheBucket::Builds))
}
/// Create a temporary directory to be used for executing PEP 517 source distribution builds.
pub fn build_dir(&self) -> io::Result<tempfile::TempDir> {
fs_err::create_dir_all(self.bucket(CacheBucket::Builds))?;
tempfile::tempdir_in(self.bucket(CacheBucket::Builds))
}
/// Returns `true` if a cache entry must be revalidated given the [`Refresh`] policy.
pub fn must_revalidate_package(&self, package: &PackageName) -> bool {
match &self.refresh {
Refresh::None(_) => false,
Refresh::All(_) => true,
Refresh::Packages(packages, _, _) => packages.contains(package),
}
}
/// Returns `true` if a cache entry must be revalidated given the [`Refresh`] policy.
pub fn must_revalidate_path(&self, path: &Path) -> bool {
match &self.refresh {
Refresh::None(_) => false,
Refresh::All(_) => true,
Refresh::Packages(_, paths, _) => paths
.iter()
.any(|target| same_file::is_same_file(path, target).unwrap_or(false)),
}
}
/// Returns the [`Freshness`] for a cache entry, validating it against the [`Refresh`] policy.
///
/// A cache entry is considered fresh if it was created after the cache itself was
/// initialized, or if the [`Refresh`] policy does not require revalidation.
pub fn freshness(
&self,
entry: &CacheEntry,
package: Option<&PackageName>,
path: Option<&Path>,
) -> io::Result<Freshness> {
// Grab the cutoff timestamp, if it's relevant.
let timestamp = match &self.refresh {
Refresh::None(_) => return Ok(Freshness::Fresh),
Refresh::All(timestamp) => timestamp,
Refresh::Packages(packages, paths, timestamp) => {
if package.is_none_or(|package| packages.contains(package))
|| path.is_some_and(|path| {
paths
.iter()
.any(|target| same_file::is_same_file(path, target).unwrap_or(false))
})
{
timestamp
} else {
return Ok(Freshness::Fresh);
}
}
};
match fs_err::metadata(entry.path()) {
Ok(metadata) => {
if Timestamp::from_metadata(&metadata) >= *timestamp {
Ok(Freshness::Fresh)
} else {
Ok(Freshness::Stale)
}
}
Err(err) if err.kind() == io::ErrorKind::NotFound => Ok(Freshness::Missing),
Err(err) => Err(err),
}
}
/// Persist a temporary directory to the artifact store, returning its unique ID.
pub async fn persist(
&self,
temp_dir: impl AsRef<Path>,
path: impl AsRef<Path>,
) -> io::Result<ArchiveId> {
// Create a unique ID for the artifact.
// TODO(charlie): Support content-addressed persistence via SHAs.
let id = ArchiveId::new();
// Move the temporary directory into the directory store.
let archive_entry = self.entry(CacheBucket::Archive, "", &id);
fs_err::create_dir_all(archive_entry.dir())?;
uv_fs::rename_with_retry(temp_dir.as_ref(), archive_entry.path()).await?;
// Create a symlink to the directory store.
fs_err::create_dir_all(path.as_ref().parent().expect("Cache entry to have parent"))?;
self.create_link(&id, path.as_ref())?;
Ok(id)
}
/// Returns `true` if the [`Cache`] is temporary.
pub fn is_temporary(&self) -> bool {
self.temp_dir.is_some()
}
/// Initialize the [`Cache`].
pub fn init(self) -> Result<Self, io::Error> {
let root = &self.root;
// Create the cache directory, if it doesn't exist.
fs_err::create_dir_all(root)?;
// Add the CACHEDIR.TAG.
cachedir::ensure_tag(root)?;
// Add the .gitignore.
match fs_err::OpenOptions::new()
.write(true)
.create_new(true)
.open(root.join(".gitignore"))
{
Ok(mut file) => file.write_all(b"*")?,
Err(err) if err.kind() == io::ErrorKind::AlreadyExists => (),
Err(err) => return Err(err),
}
// Add an empty .gitignore to the build bucket, to ensure that the cache's own .gitignore
// doesn't interfere with source distribution builds. Build backends (like hatchling) will
// traverse upwards to look for .gitignore files.
fs_err::create_dir_all(root.join(CacheBucket::SourceDistributions.to_str()))?;
match fs_err::OpenOptions::new()
.write(true)
.create_new(true)
.open(
root.join(CacheBucket::SourceDistributions.to_str())
.join(".gitignore"),
) {
Ok(_) => {}
Err(err) if err.kind() == io::ErrorKind::AlreadyExists => (),
Err(err) => return Err(err),
}
// Add a phony .git, if it doesn't exist, to ensure that the cache isn't considered to be
// part of a Git repository. (Some packages will include Git metadata (like a hash) in the
// built version if they're in a Git repository, but the cache should be viewed as an
// isolated store.).
// We have to put this below the gitignore. Otherwise, if the build backend uses the rust
// ignore crate it will walk up to the top level .gitignore and ignore its python source
// files.
fs_err::OpenOptions::new().create(true).write(true).open(
root.join(CacheBucket::SourceDistributions.to_str())
.join(".git"),
)?;
Ok(Self {
root: std::path::absolute(root)?,
..self
})
}
/// Clear the cache, removing all entries.
pub fn clear(&self, reporter: Box<dyn CleanReporter>) -> Result<Removal, io::Error> {
Remover::new(reporter).rm_rf(&self.root)
}
/// Remove a package from the cache.
///
/// Returns the number of entries removed from the cache.
pub fn remove(&self, name: &PackageName) -> Result<Removal, io::Error> {
// Collect the set of referenced archives.
let before = self.find_archive_references()?;
// Remove any entries for the package from the cache.
let mut summary = Removal::default();
for bucket in CacheBucket::iter() {
summary += bucket.remove(self, name)?;
}
// Collect the set of referenced archives after the removal.
let after = self.find_archive_references()?;
if before != after {
// Remove any archives that are no longer referenced.
for entry in fs_err::read_dir(self.bucket(CacheBucket::Archive))? {
let entry = entry?;
let path = fs_err::canonicalize(entry.path())?;
if !after.contains(&path) && before.contains(&path) {
debug!("Removing dangling cache entry: {}", path.display());
summary += rm_rf(path)?;
}
}
}
Ok(summary)
}
/// Run the garbage collector on the cache, removing any dangling entries.
pub fn prune(&self, ci: bool) -> Result<Removal, io::Error> {
let mut summary = Removal::default();
// First, remove any top-level directories that are unused. These typically represent
// outdated cache buckets (e.g., `wheels-v0`, when latest is `wheels-v1`).
for entry in fs_err::read_dir(&self.root)? {
let entry = entry?;
let metadata = entry.metadata()?;
if entry.file_name() == "CACHEDIR.TAG"
|| entry.file_name() == ".gitignore"
|| entry.file_name() == ".git"
{
continue;
}
if metadata.is_dir() {
// If the directory is not a cache bucket, remove it.
if CacheBucket::iter().all(|bucket| entry.file_name() != bucket.to_str()) {
let path = entry.path();
debug!("Removing dangling cache bucket: {}", path.display());
summary += rm_rf(path)?;
}
} else {
// If the file is not a marker file, remove it.
let path = entry.path();
debug!("Removing dangling cache bucket: {}", path.display());
summary += rm_rf(path)?;
}
}
// Second, remove any cached environments. These are never referenced by symlinks, so we can
// remove them directly.
match fs_err::read_dir(self.bucket(CacheBucket::Environments)) {
Ok(entries) => {
for entry in entries {
let entry = entry?;
let path = fs_err::canonicalize(entry.path())?;
debug!("Removing dangling cache environment: {}", path.display());
summary += rm_rf(path)?;
}
}
Err(err) if err.kind() == io::ErrorKind::NotFound => (),
Err(err) => return Err(err),
}
// Third, if enabled, remove all unzipped wheels, leaving only the wheel archives.
if ci {
// Remove the entire pre-built wheel cache, since every entry is an unzipped wheel.
match fs_err::read_dir(self.bucket(CacheBucket::Wheels)) {
Ok(entries) => {
for entry in entries {
let entry = entry?;
let path = fs_err::canonicalize(entry.path())?;
if path.is_dir() {
debug!("Removing unzipped wheel entry: {}", path.display());
summary += rm_rf(path)?;
}
}
}
Err(err) if err.kind() == io::ErrorKind::NotFound => (),
Err(err) => return Err(err),
}
for entry in walkdir::WalkDir::new(self.bucket(CacheBucket::SourceDistributions)) {
let entry = entry?;
// If the directory contains a `metadata.msgpack`, then it's a built wheel revision.
if !entry.file_type().is_dir() {
continue;
}
if !entry.path().join("metadata.msgpack").exists() {
continue;
}
// Remove everything except the built wheel archive and the metadata.
for entry in fs_err::read_dir(entry.path())? {
let entry = entry?;
let path = entry.path();
// Retain the resolved metadata (`metadata.msgpack`).
if path
.file_name()
.is_some_and(|file_name| file_name == "metadata.msgpack")
{
continue;
}
// Retain any built wheel archives.
if path
.extension()
.is_some_and(|ext| ext.eq_ignore_ascii_case("whl"))
{
continue;
}
debug!("Removing unzipped built wheel entry: {}", path.display());
summary += rm_rf(path)?;
}
}
}
// Fourth, remove any unused archives (by searching for archives that are not symlinked).
let references = self.find_archive_references()?;
match fs_err::read_dir(self.bucket(CacheBucket::Archive)) {
Ok(entries) => {
for entry in entries {
let entry = entry?;
let path = fs_err::canonicalize(entry.path())?;
if !references.contains(&path) {
debug!("Removing dangling cache archive: {}", path.display());
summary += rm_rf(path)?;
}
}
}
Err(err) if err.kind() == io::ErrorKind::NotFound => (),
Err(err) => return Err(err),
}
Ok(summary)
}
/// Find all references to entries in the archive bucket.
///
/// Archive entries are often referenced by symlinks in other cache buckets. This method
/// searches for all such references.
fn find_archive_references(&self) -> Result<FxHashSet<PathBuf>, io::Error> {
let mut references = FxHashSet::default();
for bucket in CacheBucket::iter() {
let bucket_path = self.bucket(bucket);
if bucket_path.is_dir() {
for entry in walkdir::WalkDir::new(bucket_path) {
let entry = entry?;
// Ignore any `.lock` files.
if entry
.path()
.extension()
.is_some_and(|ext| ext.eq_ignore_ascii_case("lock"))
{
continue;
}
let Some(filename) = entry
.path()
.file_name()
.and_then(|file_name| file_name.to_str())
else {
continue;
};
if bucket == CacheBucket::Wheels {
// In the `wheels` bucket, we often use a hash of the filename as the
// directory name, so we can't rely on the stem.
//
// Instead, we skip if it contains an extension (e.g., `.whl`, `.http`,
// `.rev`, and `.msgpack` files).
if filename
.rsplit_once('-') // strip version/tags, might contain a dot ('.')
.is_none_or(|(_, suffix)| suffix.contains('.'))
{
continue;
}
} else {
// For other buckets only include entries that match the wheel stem pattern (e.g., `typing-extensions-4.8.0-py3-none-any`).
if WheelFilename::from_stem(filename).is_err() {
continue;
}
}
if let Ok(target) = self.resolve_link(entry.path()) {
references.insert(target);
}
}
}
}
Ok(references)
}
/// Create a link to a directory in the archive bucket.
///
/// On Windows, we write structured data ([`Link`]) to a file containing the archive ID and
/// version. On Unix, we create a symlink to the target directory.
#[cfg(windows)]
pub fn create_link(&self, id: &ArchiveId, dst: impl AsRef<Path>) -> io::Result<()> {
// Serialize the link.
let link = Link::new(id.clone());
let contents = link.to_string();
// First, attempt to create a file at the location, but fail if it already exists.
match fs_err::OpenOptions::new()
.write(true)
.create_new(true)
.open(dst.as_ref())
{
Ok(mut file) => {
// Write the target path to the file.
file.write_all(contents.as_bytes())?;
Ok(())
}
Err(err) if err.kind() == io::ErrorKind::AlreadyExists => {
// Write to a temporary file, then move it into place.
let temp_dir = tempfile::tempdir_in(dst.as_ref().parent().unwrap())?;
let temp_file = temp_dir.path().join("link");
fs_err::write(&temp_file, contents.as_bytes())?;
// Move the symlink into the target location.
fs_err::rename(&temp_file, dst.as_ref())?;
Ok(())
}
Err(err) => Err(err),
}
}
/// Resolve an archive link, returning the fully-resolved path.
///
/// Returns an error if the link target does not exist.
#[cfg(windows)]
pub fn resolve_link(&self, path: impl AsRef<Path>) -> io::Result<PathBuf> {
// Deserialize the link.
let contents = fs_err::read_to_string(path.as_ref())?;
let link = Link::from_str(&contents)?;
// Ignore stale links.
if link.version != ARCHIVE_VERSION {
return Err(io::Error::new(
io::ErrorKind::NotFound,
"The link target does not exist.",
));
}
// Reconstruct the path.
let path = self.archive(&link.id);
path.canonicalize()
}
/// Create a link to a directory in the archive bucket.
///
/// On Windows, we write structured data ([`Link`]) to a file containing the archive ID and
/// version. On Unix, we create a symlink to the target directory.
#[cfg(unix)]
pub fn create_link(&self, id: &ArchiveId, dst: impl AsRef<Path>) -> io::Result<()> {
// Construct the link target.
let src = self.archive(id);
let dst = dst.as_ref();
// Attempt to create the symlink directly.
match fs_err::os::unix::fs::symlink(&src, dst) {
Ok(()) => Ok(()),
Err(err) if err.kind() == io::ErrorKind::AlreadyExists => {
// Create a symlink, using a temporary file to ensure atomicity.
let temp_dir = tempfile::tempdir_in(dst.parent().unwrap())?;
let temp_file = temp_dir.path().join("link");
fs_err::os::unix::fs::symlink(&src, &temp_file)?;
// Move the symlink into the target location.
fs_err::rename(&temp_file, dst)?;
Ok(())
}
Err(err) => Err(err),
}
}
/// Resolve an archive link, returning the fully-resolved path.
///
/// Returns an error if the link target does not exist.
#[cfg(unix)]
pub fn resolve_link(&self, path: impl AsRef<Path>) -> io::Result<PathBuf> {
path.as_ref().canonicalize()
}
}
/// An archive (unzipped wheel) that exists in the local cache.
#[derive(Debug, Clone)]
#[allow(unused)]
struct Link {
/// The unique ID of the entry in the archive bucket.
id: ArchiveId,
/// The version of the archive bucket.
version: u8,
}
#[allow(unused)]
impl Link {
/// Create a new [`Archive`] with the given ID and hashes.
fn new(id: ArchiveId) -> Self {
Self {
id,
version: ARCHIVE_VERSION,
}
}
}
impl Display for Link {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
write!(f, "archive-v{}/{}", self.version, self.id)
}
}
impl FromStr for Link {
type Err = io::Error;
fn from_str(s: &str) -> Result<Self, Self::Err> {
let mut parts = s.splitn(2, '/');
let version = parts
.next()
.filter(|s| !s.is_empty())
.ok_or_else(|| io::Error::new(io::ErrorKind::InvalidData, "missing version"))?;
let id = parts
.next()
.filter(|s| !s.is_empty())
.ok_or_else(|| io::Error::new(io::ErrorKind::InvalidData, "missing ID"))?;
// Parse the archive version from `archive-v{version}/{id}`.
let version = version
.strip_prefix("archive-v")
.ok_or_else(|| io::Error::new(io::ErrorKind::InvalidData, "missing version prefix"))?;
let version = u8::from_str(version).map_err(|err| {
io::Error::new(
io::ErrorKind::InvalidData,
format!("failed to parse version: {err}"),
)
})?;
// Parse the ID from `archive-v{version}/{id}`.
let id = ArchiveId::from_str(id).map_err(|err| {
io::Error::new(
io::ErrorKind::InvalidData,
format!("failed to parse ID: {err}"),
)
})?;
Ok(Self { id, version })
}
}
pub trait CleanReporter: Send + Sync {
/// Called after one file or directory is removed.
fn on_clean(&self);
/// Called after all files and directories are removed.
fn on_complete(&self);
}
/// The different kinds of data in the cache are stored in different bucket, which in our case
/// are subdirectories of the cache root.
#[derive(Debug, Clone, Copy, Eq, PartialEq, Hash)]
pub enum CacheBucket {
/// Wheels (excluding built wheels), alongside their metadata and cache policy.
///
/// There are three kinds from cache entries: Wheel metadata and policy as `MsgPack` files, the
/// wheels themselves, and the unzipped wheel archives. If a wheel file is over an in-memory
/// size threshold, we first download the zip file into the cache, then unzip it into a
/// directory with the same name (exclusive of the `.whl` extension).
///
/// Cache structure:
/// * `wheel-metadata-v0/pypi/foo/{foo-1.0.0-py3-none-any.msgpack, foo-1.0.0-py3-none-any.whl}`
/// * `wheel-metadata-v0/<digest(index-url)>/foo/{foo-1.0.0-py3-none-any.msgpack, foo-1.0.0-py3-none-any.whl}`
/// * `wheel-metadata-v0/url/<digest(url)>/foo/{foo-1.0.0-py3-none-any.msgpack, foo-1.0.0-py3-none-any.whl}`
///
/// See `uv_client::RegistryClient::wheel_metadata` for information on how wheel metadata
/// is fetched.
///
/// # Example
///
/// Consider the following `requirements.in`:
/// ```text
/// # pypi wheel
/// pandas
/// # url wheel
/// flask @ https://files.pythonhosted.org/packages/36/42/015c23096649b908c809c69388a805a571a3bea44362fe87e33fc3afa01f/flask-3.0.0-py3-none-any.whl
/// ```
///
/// When we run `pip compile`, it will only fetch and cache the metadata (and cache policy), it
/// doesn't need the actual wheels yet:
/// ```text
/// wheel-v0
/// ├── pypi
/// │ ...
/// │ ├── pandas
/// │ │ └── pandas-2.1.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.msgpack
/// │ ...
/// └── url
/// └── 4b8be67c801a7ecb
/// └── flask
/// └── flask-3.0.0-py3-none-any.msgpack
/// ```
///
/// We get the following `requirement.txt` from `pip compile`:
///
/// ```text
/// [...]
/// flask @ https://files.pythonhosted.org/packages/36/42/015c23096649b908c809c69388a805a571a3bea44362fe87e33fc3afa01f/flask-3.0.0-py3-none-any.whl
/// [...]
/// pandas==2.1.3
/// [...]
/// ```
///
/// If we run `pip sync` on `requirements.txt` on a different machine, it also fetches the
/// wheels:
///
/// TODO(konstin): This is still wrong, we need to store the cache policy too!
/// ```text
/// wheel-v0
/// ├── pypi
/// │ ...
/// │ ├── pandas
/// │ │ ├── pandas-2.1.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl
/// │ │ ├── pandas-2.1.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64
/// │ ...
/// └── url
/// └── 4b8be67c801a7ecb
/// └── flask
/// └── flask-3.0.0-py3-none-any.whl
/// ├── flask
/// │ └── ...
/// └── flask-3.0.0.dist-info
/// └── ...
/// ```
///
/// If we run first `pip compile` and then `pip sync` on the same machine, we get both:
///
/// ```text
/// wheels-v0
/// ├── pypi
/// │ ├── ...
/// │ ├── pandas
/// │ │ ├── pandas-2.1.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.msgpack
/// │ │ ├── pandas-2.1.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl
/// │ │ └── pandas-2.1.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64
/// │ │ ├── pandas
/// │ │ │ ├── ...
/// │ │ ├── pandas-2.1.3.dist-info
/// │ │ │ ├── ...
/// │ │ └── pandas.libs
/// │ ├── ...
/// └── url
/// └── 4b8be67c801a7ecb
/// └── flask
/// ├── flask-3.0.0-py3-none-any.msgpack
/// ├── flask-3.0.0-py3-none-any.msgpack
/// └── flask-3.0.0-py3-none-any
/// ├── flask
/// │ └── ...
/// └── flask-3.0.0.dist-info
/// └── ...
Wheels,
/// Source distributions, wheels built from source distributions, their extracted metadata, and the
/// cache policy of the source distribution.
///
/// The structure is similar of that of the `Wheel` bucket, except we have an additional layer
/// for the source distribution filename and the metadata is at the source distribution-level,
/// not at the wheel level.
///
/// TODO(konstin): The cache policy should be on the source distribution level, the metadata we
/// can put next to the wheels as in the `Wheels` bucket.
///
/// The unzipped source distribution is stored in a directory matching the source distribution
/// archive name.
///
/// Source distributions are built into zipped wheel files (as PEP 517 specifies) and unzipped
/// lazily before installing. So when resolving, we only build the wheel and store the archive
/// file in the cache, when installing, we unpack it under the same name (exclusive of the
/// `.whl` extension). You may find a mix of wheel archive zip files and unzipped wheel
/// directories in the cache.
///
/// Cache structure:
/// * `built-wheels-v0/pypi/foo/34a17436ed1e9669/{manifest.msgpack, metadata.msgpack, foo-1.0.0.zip, foo-1.0.0-py3-none-any.whl, ...other wheels}`
/// * `built-wheels-v0/<digest(index-url)>/foo/foo-1.0.0.zip/{manifest.msgpack, metadata.msgpack, foo-1.0.0-py3-none-any.whl, ...other wheels}`
/// * `built-wheels-v0/url/<digest(url)>/foo/foo-1.0.0.zip/{manifest.msgpack, metadata.msgpack, foo-1.0.0-py3-none-any.whl, ...other wheels}`
/// * `built-wheels-v0/git/<digest(url)>/<git sha>/foo/foo-1.0.0.zip/{metadata.msgpack, foo-1.0.0-py3-none-any.whl, ...other wheels}`
///
/// But the url filename does not need to be a valid source dist filename
/// (<https://github.com/search?q=path%3A**%2Frequirements.txt+master.zip&type=code>),
/// so it could also be the following and we have to take any string as filename:
/// * `built-wheels-v0/url/<sha256(url)>/master.zip/metadata.msgpack`
///
/// # Example
///
/// The following requirements:
/// ```text
/// # git source dist
/// pydantic-extra-types @ git+https://github.com/pydantic/pydantic-extra-types.git
/// # pypi source dist
/// django_allauth==0.51.0
/// # url source dist
/// werkzeug @ https://files.pythonhosted.org/packages/0d/cc/ff1904eb5eb4b455e442834dabf9427331ac0fa02853bf83db817a7dd53d/werkzeug-3.0.1.tar.gz
/// ```
///
/// ...may be cached as:
/// ```text
/// built-wheels-v4/
/// ├── git
/// │   └── 2122faf3e081fb7a
/// │      └── 7a2d650a4a7b4d04
/// │      ├── metadata.msgpack
/// │       └── pydantic_extra_types-2.9.0-py3-none-any.whl
/// ├── pypi
/// │ └── django-allauth
/// │ └── 0.51.0
/// │ ├── 0gH-_fwv8tdJ7JwwjJsUc
/// │ │   ├── django-allauth-0.51.0.tar.gz
/// │ │ │ └── [UNZIPPED CONTENTS]
/// │ │   ├── django_allauth-0.51.0-py3-none-any.whl
/// │ │   └── metadata.msgpack
/// │ └── revision.http
/// └── url
/// └── 6781bd6440ae72c2
/// ├── APYY01rbIfpAo_ij9sCY6
/// │   ├── metadata.msgpack
/// │   ├── werkzeug-3.0.1-py3-none-any.whl
/// │   └── werkzeug-3.0.1.tar.gz
/// │ └── [UNZIPPED CONTENTS]
/// └── revision.http
/// ```
///
/// Structurally, the `manifest.msgpack` is empty, and only contains the caching information
/// needed to invalidate the cache. The `metadata.msgpack` contains the metadata of the source
/// distribution.
SourceDistributions,
/// Flat index responses, a format very similar to the simple metadata API.
///
/// Cache structure:
/// * `flat-index-v0/index/<digest(flat_index_url)>.msgpack`
///
/// The response is stored as `Vec<File>`.
FlatIndex,
/// Git repositories.
Git,
/// Information about an interpreter at a path.
///
/// To avoid caching pyenv shims, bash scripts which may redirect to a new python version
/// without the shim itself changing, we only cache when the path equals `sys.executable`, i.e.
/// the path we're running is the python executable itself and not a shim.
///
/// Cache structure: `interpreter-v0/<digest(path)>.msgpack`
///
/// # Example
///
/// The contents of each of the `MsgPack` files has a timestamp field in unix time, the [PEP 508]
/// markers and some information from the `sys`/`sysconfig` modules.
///
/// ```json
/// {
/// "timestamp": 1698047994491,
/// "data": {
/// "markers": {
/// "implementation_name": "cpython",
/// "implementation_version": "3.12.0",
/// "os_name": "posix",
/// "platform_machine": "x86_64",
/// "platform_python_implementation": "CPython",
/// "platform_release": "6.5.0-13-generic",
/// "platform_system": "Linux",
/// "platform_version": "#13-Ubuntu SMP PREEMPT_DYNAMIC Fri Nov 3 12:16:05 UTC 2023",
/// "python_full_version": "3.12.0",
/// "python_version": "3.12",
/// "sys_platform": "linux"
/// },
/// "base_exec_prefix": "/home/ferris/.pyenv/versions/3.12.0",
/// "base_prefix": "/home/ferris/.pyenv/versions/3.12.0",
/// "sys_executable": "/home/ferris/projects/uv/.venv/bin/python"
/// }
/// }
/// ```
///
/// [PEP 508]: https://peps.python.org/pep-0508/#environment-markers
Interpreter,
/// Index responses through the simple metadata API.
///
/// Cache structure:
/// * `simple-v0/pypi/<package_name>.rkyv`
/// * `simple-v0/<digest(index_url)>/<package_name>.rkyv`
///
/// The response is parsed into `uv_client::SimpleMetadata` before storage.
Simple,
/// A cache of unzipped wheels, stored as directories. This is used internally within the cache.
/// When other buckets need to store directories, they should persist them to
/// [`CacheBucket::Archive`], and then symlink them into the appropriate bucket. This ensures
/// that cache entries can be atomically replaced and removed, as storing directories in the
/// other buckets directly would make atomic operations impossible.
Archive,
/// Ephemeral virtual environments used to execute PEP 517 builds and other operations.
Builds,
/// Reusable virtual environments used to invoke Python tools.
Environments,
}
impl CacheBucket {
fn to_str(self) -> &'static str {
match self {
// Note that when bumping this, you'll also need to bump it
// in `crates/uv/tests/it/cache_prune.rs`.
Self::SourceDistributions => "sdists-v9",
Self::FlatIndex => "flat-index-v2",
Self::Git => "git-v0",
Self::Interpreter => "interpreter-v4",
// Note that when bumping this, you'll also need to bump it
// in `crates/uv/tests/it/cache_clean.rs`.
Self::Simple => "simple-v15",
// Note that when bumping this, you'll also need to bump it
// in `crates/uv/tests/it/cache_prune.rs`.
Self::Wheels => "wheels-v5",
// Note that when bumping this, you'll also need to bump
// `ARCHIVE_VERSION` in `crates/uv-cache/src/lib.rs`.
Self::Archive => "archive-v0",
Self::Builds => "builds-v0",
Self::Environments => "environments-v2",
}
}
/// Remove a package from the cache bucket.
///
/// Returns the number of entries removed from the cache.
fn remove(self, cache: &Cache, name: &PackageName) -> Result<Removal, io::Error> {
/// Returns `true` if the [`Path`] represents a built wheel for the given package.
fn is_match(path: &Path, name: &PackageName) -> bool {
let Ok(metadata) = fs_err::read(path.join("metadata.msgpack")) else {
return false;
};
let Ok(metadata) = rmp_serde::from_slice::<ResolutionMetadata>(&metadata) else {
return false;
};
metadata.name == *name
}
let mut summary = Removal::default();
match self {
Self::Wheels => {
// For `pypi` wheels, we expect a directory per package (indexed by name).
let root = cache.bucket(self).join(WheelCacheKind::Pypi);
summary += rm_rf(root.join(name.to_string()))?;
// For alternate indices, we expect a directory for every index (under an `index`
// subdirectory), followed by a directory per package (indexed by name).
let root = cache.bucket(self).join(WheelCacheKind::Index);
for directory in directories(root)? {
summary += rm_rf(directory.join(name.to_string()))?;
}
// For direct URLs, we expect a directory for every URL, followed by a
// directory per package (indexed by name).
let root = cache.bucket(self).join(WheelCacheKind::Url);
for directory in directories(root)? {
summary += rm_rf(directory.join(name.to_string()))?;
}
}
Self::SourceDistributions => {
// For `pypi` wheels, we expect a directory per package (indexed by name).
let root = cache.bucket(self).join(WheelCacheKind::Pypi);
summary += rm_rf(root.join(name.to_string()))?;
// For alternate indices, we expect a directory for every index (under an `index`
// subdirectory), followed by a directory per package (indexed by name).
let root = cache.bucket(self).join(WheelCacheKind::Index);
for directory in directories(root)? {
summary += rm_rf(directory.join(name.to_string()))?;
}
// For direct URLs, we expect a directory for every URL, followed by a
// directory per version. To determine whether the URL is relevant, we need to
// search for a wheel matching the package name.
let root = cache.bucket(self).join(WheelCacheKind::Url);
for url in directories(root)? {
if directories(&url)?.any(|version| is_match(&version, name)) {
summary += rm_rf(url)?;
}
}
// For local dependencies, we expect a directory for every path, followed by a
// directory per version. To determine whether the path is relevant, we need to
// search for a wheel matching the package name.
let root = cache.bucket(self).join(WheelCacheKind::Path);
for path in directories(root)? {
if directories(&path)?.any(|version| is_match(&version, name)) {
summary += rm_rf(path)?;
}
}
// For Git dependencies, we expect a directory for every repository, followed by a
// directory for every SHA. To determine whether the SHA is relevant, we need to
// search for a wheel matching the package name.
let root = cache.bucket(self).join(WheelCacheKind::Git);
for repository in directories(root)? {
for sha in directories(repository)? {
if is_match(&sha, name) {
summary += rm_rf(sha)?;
}
}
}
}
Self::Simple => {
// For `pypi` wheels, we expect a rkyv file per package, indexed by name.
let root = cache.bucket(self).join(WheelCacheKind::Pypi);
summary += rm_rf(root.join(format!("{name}.rkyv")))?;
// For alternate indices, we expect a directory for every index (under an `index`
// subdirectory), followed by a directory per package (indexed by name).
let root = cache.bucket(self).join(WheelCacheKind::Index);
for directory in directories(root)? {
summary += rm_rf(directory.join(format!("{name}.rkyv")))?;
}
}
Self::FlatIndex => {
// We can't know if the flat index includes a package, so we just remove the entire
// cache entry.
let root = cache.bucket(self);
summary += rm_rf(root)?;
}
Self::Git | Self::Interpreter | Self::Archive | Self::Builds | Self::Environments => {
// Nothing to do.
}
}
Ok(summary)
}
/// Return an iterator over all cache buckets.
pub fn iter() -> impl Iterator<Item = Self> {
[
Self::Wheels,
Self::SourceDistributions,
Self::FlatIndex,
Self::Git,
Self::Interpreter,
Self::Simple,
Self::Archive,
Self::Builds,
Self::Environments,
]
.iter()
.copied()
}
}
impl Display for CacheBucket {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
f.write_str(self.to_str())
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum Freshness {
/// The cache entry is fresh according to the [`Refresh`] policy.
Fresh,
/// The cache entry is stale according to the [`Refresh`] policy.
Stale,
/// The cache entry does not exist.
Missing,
}
impl Freshness {
pub const fn is_fresh(self) -> bool {
matches!(self, Self::Fresh)
}
pub const fn is_stale(self) -> bool {
matches!(self, Self::Stale)
}
}
/// A refresh policy for cache entries.
#[derive(Debug, Clone)]
pub enum Refresh {
/// Don't refresh any entries.
None(Timestamp),
/// Refresh entries linked to the given packages, if created before the given timestamp.
Packages(Vec<PackageName>, Vec<Box<Path>>, Timestamp),
/// Refresh all entries created before the given timestamp.
All(Timestamp),
}
impl Refresh {
/// Determine the refresh strategy to use based on the command-line arguments.
pub fn from_args(refresh: Option<bool>, refresh_package: Vec<PackageName>) -> Self {
let timestamp = Timestamp::now();
match refresh {
Some(true) => Self::All(timestamp),
Some(false) => Self::None(timestamp),
None => {
if refresh_package.is_empty() {
Self::None(timestamp)
} else {
Self::Packages(refresh_package, vec![], timestamp)
}
}
}
}
/// Return the [`Timestamp`] associated with the refresh policy.
pub fn timestamp(&self) -> Timestamp {
match self {
Self::None(timestamp) => *timestamp,
Self::Packages(.., timestamp) => *timestamp,
Self::All(timestamp) => *timestamp,
}
}
/// Returns `true` if no packages should be reinstalled.
pub fn is_none(&self) -> bool {
matches!(self, Self::None(_))
}
/// Combine two [`Refresh`] policies, taking the "max" of the two policies.
#[must_use]
pub fn combine(self, other: Refresh) -> Self {
/// Return the maximum of two timestamps.
fn max(a: Timestamp, b: Timestamp) -> Timestamp {
if a > b {
a
} else {
b
}
}
match (self, other) {
// If the policy is `None`, return the existing refresh policy.
// Take the `max` of the two timestamps.
(Self::None(t1), Refresh::None(t2)) => Refresh::None(max(t1, t2)),
(Self::None(t1), Refresh::All(t2)) => Refresh::All(max(t1, t2)),
(Self::None(t1), Refresh::Packages(packages, paths, t2)) => {
Refresh::Packages(packages, paths, max(t1, t2))
}
// If the policy is `All`, refresh all packages.
(Self::All(t1), Refresh::None(t2)) => Refresh::All(max(t1, t2)),
(Self::All(t1), Refresh::All(t2)) => Refresh::All(max(t1, t2)),
(Self::All(t1), Refresh::Packages(.., t2)) => Refresh::All(max(t1, t2)),
// If the policy is `Packages`, take the "max" of the two policies.
(Self::Packages(packages, paths, t1), Refresh::None(t2)) => {
Refresh::Packages(packages, paths, max(t1, t2))
}
(Self::Packages(.., t1), Refresh::All(t2)) => Refresh::All(max(t1, t2)),
(Self::Packages(packages1, paths1, t1), Refresh::Packages(packages2, paths2, t2)) => {
Refresh::Packages(
packages1.into_iter().chain(packages2).collect(),
paths1.into_iter().chain(paths2).collect(),
max(t1, t2),
)
}
}
}
}
#[cfg(test)]
mod tests {
use std::str::FromStr;
use crate::ArchiveId;
use super::Link;
#[test]
fn test_link_round_trip() {
let id = ArchiveId::new();
let link = Link::new(id);
let s = link.to_string();
let parsed = Link::from_str(&s).unwrap();
assert_eq!(link.id, parsed.id);
assert_eq!(link.version, parsed.version);
}
#[test]
fn test_link_deserialize() {
assert!(Link::from_str("archive-v0/foo").is_ok());
assert!(Link::from_str("archive/foo").is_err());
assert!(Link::from_str("v1/foo").is_err());
assert!(Link::from_str("archive-v0/").is_err());
}
}