mirror of
https://github.com/astral-sh/uv.git
synced 2025-07-07 21:35:00 +00:00
343 lines
12 KiB
Rust
343 lines
12 KiB
Rust
use std::borrow::Cow;
|
|
use std::cmp::max;
|
|
use std::collections::BTreeMap;
|
|
use std::path::{Path, PathBuf};
|
|
|
|
use serde::Deserialize;
|
|
use tracing::{debug, warn};
|
|
|
|
use crate::git_info::{Commit, Tags};
|
|
use crate::timestamp::Timestamp;
|
|
|
|
#[derive(Debug, thiserror::Error)]
|
|
pub enum CacheInfoError {
|
|
#[error("Failed to parse glob patterns for `cache-keys`: {0}")]
|
|
Glob(#[from] globwalk::GlobError),
|
|
#[error(transparent)]
|
|
Io(#[from] std::io::Error),
|
|
}
|
|
|
|
/// The information used to determine whether a built distribution is up-to-date, based on the
|
|
/// timestamps of relevant files, the current commit of a repository, etc.
|
|
#[derive(Default, Debug, Clone, Hash, PartialEq, Eq, serde::Deserialize, serde::Serialize)]
|
|
#[serde(rename_all = "kebab-case")]
|
|
pub struct CacheInfo {
|
|
/// The timestamp of the most recent `ctime` of any relevant files, at the time of the build.
|
|
/// The timestamp will typically be the maximum of the `ctime` values of the `pyproject.toml`,
|
|
/// `setup.py`, and `setup.cfg` files, if they exist; however, users can provide additional
|
|
/// files to timestamp via the `cache-keys` field.
|
|
timestamp: Option<Timestamp>,
|
|
/// The commit at which the distribution was built.
|
|
commit: Option<Commit>,
|
|
/// The Git tags present at the time of the build.
|
|
tags: Option<Tags>,
|
|
/// Environment variables to include in the cache key.
|
|
#[serde(default)]
|
|
env: BTreeMap<String, Option<String>>,
|
|
/// The timestamp or inode of any directories that should be considered in the cache key.
|
|
#[serde(default)]
|
|
directories: BTreeMap<Cow<'static, str>, Option<DirectoryTimestamp>>,
|
|
}
|
|
|
|
impl CacheInfo {
|
|
/// Return the [`CacheInfo`] for a given timestamp.
|
|
pub fn from_timestamp(timestamp: Timestamp) -> Self {
|
|
Self {
|
|
timestamp: Some(timestamp),
|
|
..Self::default()
|
|
}
|
|
}
|
|
|
|
/// Compute the cache info for a given path, which may be a file or a directory.
|
|
pub fn from_path(path: &Path) -> Result<Self, CacheInfoError> {
|
|
let metadata = fs_err::metadata(path)?;
|
|
if metadata.is_file() {
|
|
Ok(Self::from_file(path)?)
|
|
} else {
|
|
Self::from_directory(path)
|
|
}
|
|
}
|
|
|
|
/// Compute the cache info for a given directory.
|
|
pub fn from_directory(directory: &Path) -> Result<Self, CacheInfoError> {
|
|
let mut commit = None;
|
|
let mut tags = None;
|
|
let mut timestamp = None;
|
|
let mut directories = BTreeMap::new();
|
|
let mut env = BTreeMap::new();
|
|
|
|
// Read the cache keys.
|
|
let cache_keys =
|
|
if let Ok(contents) = fs_err::read_to_string(directory.join("pyproject.toml")) {
|
|
if let Ok(pyproject_toml) = toml::from_str::<PyProjectToml>(&contents) {
|
|
pyproject_toml
|
|
.tool
|
|
.and_then(|tool| tool.uv)
|
|
.and_then(|tool_uv| tool_uv.cache_keys)
|
|
} else {
|
|
None
|
|
}
|
|
} else {
|
|
None
|
|
};
|
|
|
|
// If no cache keys were defined, use the defaults.
|
|
let cache_keys = cache_keys.unwrap_or_else(|| {
|
|
vec![
|
|
CacheKey::Path(Cow::Borrowed("pyproject.toml")),
|
|
CacheKey::Path(Cow::Borrowed("setup.py")),
|
|
CacheKey::Path(Cow::Borrowed("setup.cfg")),
|
|
CacheKey::Directory {
|
|
dir: Cow::Borrowed("src"),
|
|
},
|
|
]
|
|
});
|
|
|
|
// Incorporate timestamps from any direct filepaths.
|
|
let mut globs = vec![];
|
|
for cache_key in cache_keys {
|
|
match cache_key {
|
|
CacheKey::Path(file) | CacheKey::File { file } => {
|
|
if file
|
|
.as_ref()
|
|
.chars()
|
|
.any(|c| matches!(c, '*' | '?' | '[' | '{'))
|
|
{
|
|
// Defer globs to a separate pass.
|
|
globs.push(file);
|
|
continue;
|
|
}
|
|
|
|
// Treat the path as a file.
|
|
let path = directory.join(file.as_ref());
|
|
let metadata = match path.metadata() {
|
|
Ok(metadata) => metadata,
|
|
Err(err) if err.kind() == std::io::ErrorKind::NotFound => {
|
|
continue;
|
|
}
|
|
Err(err) => {
|
|
warn!("Failed to read metadata for file: {err}");
|
|
continue;
|
|
}
|
|
};
|
|
if !metadata.is_file() {
|
|
warn!(
|
|
"Expected file for cache key, but found directory: `{}`",
|
|
path.display()
|
|
);
|
|
continue;
|
|
}
|
|
timestamp = max(timestamp, Some(Timestamp::from_metadata(&metadata)));
|
|
}
|
|
CacheKey::Directory { dir } => {
|
|
// Treat the path as a directory.
|
|
let path = directory.join(dir.as_ref());
|
|
let metadata = match path.metadata() {
|
|
Ok(metadata) => metadata,
|
|
Err(err) if err.kind() == std::io::ErrorKind::NotFound => {
|
|
directories.insert(dir, None);
|
|
continue;
|
|
}
|
|
Err(err) => {
|
|
warn!("Failed to read metadata for directory: {err}");
|
|
continue;
|
|
}
|
|
};
|
|
if !metadata.is_dir() {
|
|
warn!(
|
|
"Expected directory for cache key, but found file: `{}`",
|
|
path.display()
|
|
);
|
|
continue;
|
|
}
|
|
|
|
if let Ok(created) = metadata.created() {
|
|
// Prefer the creation time.
|
|
directories.insert(
|
|
dir,
|
|
Some(DirectoryTimestamp::Timestamp(Timestamp::from(created))),
|
|
);
|
|
} else {
|
|
// Fall back to the inode.
|
|
#[cfg(unix)]
|
|
{
|
|
use std::os::unix::fs::MetadataExt;
|
|
directories
|
|
.insert(dir, Some(DirectoryTimestamp::Inode(metadata.ino())));
|
|
}
|
|
#[cfg(not(unix))]
|
|
{
|
|
warn!(
|
|
"Failed to read creation time for directory: `{}`",
|
|
path.display()
|
|
);
|
|
}
|
|
}
|
|
}
|
|
CacheKey::Git {
|
|
git: GitPattern::Bool(true),
|
|
} => match Commit::from_repository(directory) {
|
|
Ok(commit_info) => commit = Some(commit_info),
|
|
Err(err) => {
|
|
debug!("Failed to read the current commit: {err}");
|
|
}
|
|
},
|
|
CacheKey::Git {
|
|
git: GitPattern::Set(set),
|
|
} => {
|
|
if set.commit.unwrap_or(false) {
|
|
match Commit::from_repository(directory) {
|
|
Ok(commit_info) => commit = Some(commit_info),
|
|
Err(err) => {
|
|
debug!("Failed to read the current commit: {err}");
|
|
}
|
|
}
|
|
}
|
|
if set.tags.unwrap_or(false) {
|
|
match Tags::from_repository(directory) {
|
|
Ok(tags_info) => tags = Some(tags_info),
|
|
Err(err) => {
|
|
debug!("Failed to read the current tags: {err}");
|
|
}
|
|
}
|
|
}
|
|
}
|
|
CacheKey::Git {
|
|
git: GitPattern::Bool(false),
|
|
} => {}
|
|
CacheKey::Environment { env: var } => {
|
|
let value = std::env::var(&var).ok();
|
|
env.insert(var, value);
|
|
}
|
|
}
|
|
}
|
|
|
|
// If we have any globs, process them in a single pass.
|
|
if !globs.is_empty() {
|
|
let walker = globwalk::GlobWalkerBuilder::from_patterns(directory, &globs)
|
|
.follow_links(true)
|
|
.file_type(globwalk::FileType::FILE | globwalk::FileType::SYMLINK)
|
|
.build()?;
|
|
for entry in walker {
|
|
let entry = match entry {
|
|
Ok(entry) => entry,
|
|
Err(err) => {
|
|
warn!("Failed to read glob entry: {err}");
|
|
continue;
|
|
}
|
|
};
|
|
let metadata = match entry.metadata() {
|
|
Ok(metadata) => metadata,
|
|
Err(err) => {
|
|
warn!("Failed to read metadata for glob entry: {err}");
|
|
continue;
|
|
}
|
|
};
|
|
if !metadata.is_file() {
|
|
warn!(
|
|
"Expected file for cache key, but found directory: `{}`",
|
|
entry.path().display()
|
|
);
|
|
continue;
|
|
}
|
|
timestamp = max(timestamp, Some(Timestamp::from_metadata(&metadata)));
|
|
}
|
|
}
|
|
|
|
debug!(
|
|
"Computed cache info: {timestamp:?}, {commit:?}, {tags:?}, {env:?}, {directories:?}"
|
|
);
|
|
|
|
Ok(Self {
|
|
timestamp,
|
|
commit,
|
|
tags,
|
|
env,
|
|
directories,
|
|
})
|
|
}
|
|
|
|
/// Compute the cache info for a given file, assumed to be a binary or source distribution
|
|
/// represented as (e.g.) a `.whl` or `.tar.gz` archive.
|
|
pub fn from_file(path: impl AsRef<Path>) -> std::io::Result<Self> {
|
|
let metadata = fs_err::metadata(path.as_ref())?;
|
|
let timestamp = Timestamp::from_metadata(&metadata);
|
|
Ok(Self {
|
|
timestamp: Some(timestamp),
|
|
..Self::default()
|
|
})
|
|
}
|
|
|
|
/// Returns `true` if the cache info is empty.
|
|
pub fn is_empty(&self) -> bool {
|
|
self.timestamp.is_none()
|
|
&& self.commit.is_none()
|
|
&& self.tags.is_none()
|
|
&& self.env.is_empty()
|
|
&& self.directories.is_empty()
|
|
}
|
|
}
|
|
|
|
/// A `pyproject.toml` with an (optional) `[tool.uv]` section.
|
|
#[derive(Debug, Deserialize)]
|
|
#[serde(rename_all = "kebab-case")]
|
|
struct PyProjectToml {
|
|
tool: Option<Tool>,
|
|
}
|
|
|
|
#[derive(Debug, Deserialize)]
|
|
#[serde(rename_all = "kebab-case")]
|
|
struct Tool {
|
|
uv: Option<ToolUv>,
|
|
}
|
|
|
|
#[derive(Debug, Deserialize)]
|
|
#[serde(rename_all = "kebab-case")]
|
|
struct ToolUv {
|
|
cache_keys: Option<Vec<CacheKey>>,
|
|
}
|
|
|
|
#[derive(Debug, Clone, serde::Deserialize)]
|
|
#[cfg_attr(feature = "schemars", derive(schemars::JsonSchema))]
|
|
#[serde(untagged, rename_all = "kebab-case", deny_unknown_fields)]
|
|
pub enum CacheKey {
|
|
/// Ex) `"Cargo.lock"` or `"**/*.toml"`
|
|
Path(Cow<'static, str>),
|
|
/// Ex) `{ file = "Cargo.lock" }` or `{ file = "**/*.toml" }`
|
|
File { file: Cow<'static, str> },
|
|
/// Ex) `{ dir = "src" }`
|
|
Directory { dir: Cow<'static, str> },
|
|
/// Ex) `{ git = true }` or `{ git = { commit = true, tags = false } }`
|
|
Git { git: GitPattern },
|
|
/// Ex) `{ env = "UV_CACHE_INFO" }`
|
|
Environment { env: String },
|
|
}
|
|
|
|
#[derive(Debug, Clone, serde::Deserialize)]
|
|
#[cfg_attr(feature = "schemars", derive(schemars::JsonSchema))]
|
|
#[serde(untagged, rename_all = "kebab-case", deny_unknown_fields)]
|
|
pub enum GitPattern {
|
|
Bool(bool),
|
|
Set(GitSet),
|
|
}
|
|
|
|
#[derive(Debug, Clone, serde::Deserialize)]
|
|
#[cfg_attr(feature = "schemars", derive(schemars::JsonSchema))]
|
|
#[serde(rename_all = "kebab-case", deny_unknown_fields)]
|
|
pub struct GitSet {
|
|
commit: Option<bool>,
|
|
tags: Option<bool>,
|
|
}
|
|
|
|
pub enum FilePattern {
|
|
Glob(String),
|
|
Path(PathBuf),
|
|
}
|
|
|
|
/// A timestamp used to measure changes to a directory.
|
|
#[derive(Debug, Clone, Hash, PartialEq, Eq, serde::Deserialize, serde::Serialize)]
|
|
#[serde(untagged, rename_all = "kebab-case", deny_unknown_fields)]
|
|
enum DirectoryTimestamp {
|
|
Timestamp(Timestamp),
|
|
Inode(u64),
|
|
}
|