Support globs as cache keys in tool.uv.cache-keys (#7268)

## Summary

This has been asked for a few times. There are risks that these checks
could be slow, but they're buyer-beware.

Closes https://github.com/astral-sh/uv/issues/7246.
This commit is contained in:
Charlie Marsh 2024-09-11 15:30:59 -04:00 committed by GitHub
parent d7ec546e71
commit 58a157a0ad
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
8 changed files with 171 additions and 21 deletions

1
Cargo.lock generated
View file

@ -4610,6 +4610,7 @@ name = "uv-cache-info"
version = "0.0.1"
dependencies = [
"fs-err",
"glob",
"schemars",
"serde",
"thiserror",

View file

@ -14,6 +14,7 @@ workspace = true
[dependencies]
fs-err = { workspace = true }
glob = { workspace = true }
schemars = { workspace = true, optional = true }
serde = { workspace = true, features = ["derive"] }
thiserror = { workspace = true }

View file

@ -1,11 +1,12 @@
use crate::commit_info::CacheCommit;
use crate::timestamp::Timestamp;
use glob::MatchOptions;
use serde::Deserialize;
use std::cmp::max;
use std::io;
use std::path::{Path, PathBuf};
use tracing::debug;
use tracing::{debug, warn};
/// The information used to determine whether a built distribution is up-to-date, based on the
/// timestamps of relevant files, the current commit of a repository, etc.
@ -64,9 +65,9 @@ impl CacheInfo {
// If no cache keys were defined, use the defaults.
let cache_keys = cache_keys.unwrap_or_else(|| {
vec![
CacheKey::Path(directory.join("pyproject.toml")),
CacheKey::Path(directory.join("setup.py")),
CacheKey::Path(directory.join("setup.cfg")),
CacheKey::Path("pyproject.toml".to_string()),
CacheKey::Path("setup.py".to_string()),
CacheKey::Path("setup.cfg".to_string()),
]
});
@ -74,14 +75,71 @@ impl CacheInfo {
for cache_key in &cache_keys {
match cache_key {
CacheKey::Path(file) | CacheKey::File { file } => {
timestamp = max(
timestamp,
file.metadata()
.ok()
.filter(std::fs::Metadata::is_file)
.as_ref()
.map(Timestamp::from_metadata),
);
if file.chars().any(|c| matches!(c, '*' | '?' | '[')) {
// Treat the path as a glob.
let path = directory.join(file);
let Some(pattern) = path.to_str() else {
warn!("Failed to convert pattern to string: {}", path.display());
continue;
};
let paths = match glob::glob_with(
pattern,
MatchOptions {
case_sensitive: true,
require_literal_separator: true,
require_literal_leading_dot: false,
},
) {
Ok(paths) => paths,
Err(err) => {
warn!("Failed to parse glob pattern: {err}");
continue;
}
};
for entry in paths {
let entry = match entry {
Ok(entry) => entry,
Err(err) => {
warn!("Failed to read glob entry: {err}");
continue;
}
};
let metadata = match entry.metadata() {
Ok(metadata) => metadata,
Err(err) => {
warn!("Failed to read metadata for glob entry: {err}");
continue;
}
};
if metadata.is_file() {
timestamp =
max(timestamp, Some(Timestamp::from_metadata(&metadata)));
} else {
warn!(
"Expected file for cache key, but found directory: `{}`",
entry.display()
);
}
}
} else {
// Treat the path as a file.
let path = directory.join(file);
let metadata = match path.metadata() {
Ok(metadata) => metadata,
Err(err) => {
warn!("Failed to read metadata for file: {err}");
continue;
}
};
if metadata.is_file() {
timestamp = max(timestamp, Some(Timestamp::from_metadata(&metadata)));
} else {
warn!(
"Expected file for cache key, but found directory: `{}`",
path.display()
);
}
}
}
CacheKey::Git { git: true } => match CacheCommit::from_repository(directory) {
Ok(commit_info) => commit = Some(commit_info),
@ -165,10 +223,15 @@ struct ToolUv {
#[cfg_attr(feature = "schemars", derive(schemars::JsonSchema))]
#[serde(untagged, rename_all = "kebab-case", deny_unknown_fields)]
pub enum CacheKey {
/// Ex) `"Cargo.lock"`
Path(PathBuf),
/// Ex) `{ file = "Cargo.lock" }`
File { file: PathBuf },
/// Ex) `"Cargo.lock"` or `"**/*.toml"`
Path(String),
/// Ex) `{ file = "Cargo.lock" }` or `{ file = "**/*.toml" }`
File { file: String },
/// Ex) `{ git = true }`
Git { git: bool },
}
pub enum FilePattern {
Glob(String),
Path(PathBuf),
}

View file

@ -58,13 +58,20 @@ pub struct Options {
/// to ensure that the project is rebuilt whenever the `requirements.txt` file is modified (in
/// addition to watching the `pyproject.toml`).
///
/// Globs are supported, following the syntax of the [`glob`](https://docs.rs/glob/0.3.1/glob/struct.Pattern.html)
/// crate. For example, to invalidate the cache whenever a `.toml` file in the project directory
/// or any of its subdirectories is modified, you can specify `cache-keys = [{ file = "**/*.toml" }]`.
/// Note that the use of globs can be expensive, as uv may need to walk the filesystem to
/// determine whether any files have changed.
///
/// Cache keys can also include version control information. For example, if a project uses
/// `setuptools_scm` to read its version from a Git tag, you can specify `cache-keys = [{ git = true }, { file = "pyproject.toml" }]`
/// to include the current Git commit hash in the cache key (in addition to the
/// `pyproject.toml`).
///
/// Cache keys only affect the project defined by the `pyproject.toml` in which they're
/// specified (as opposed to, e.g., affecting all members in a workspace).
/// specified (as opposed to, e.g., affecting all members in a workspace), and all paths and
/// globs are interpreted as relative to the project directory.
#[option(
default = r#"[{ file = "pyproject.toml" }, { file = "setup.py" }, { file = "setup.cfg" }]"#,
value_type = "list[dict]",

View file

@ -3265,6 +3265,62 @@ fn invalidate_path_on_cache_key() -> Result<()> {
"###
);
// Modify the `pyproject.toml` to use a glob.
pyproject_toml.write_str(
r#"[project]
name = "example"
version = "0.0.0"
dependencies = ["anyio==4.0.0"]
requires-python = ">=3.8"
[tool.uv]
cache-keys = [{ file = "**/*.txt" }]
"#,
)?;
// Write a new file.
editable_dir
.child("resources")
.child("data.txt")
.write_str("data")?;
// Installing again should update the package.
uv_snapshot!(context.filters(), context.pip_install()
.arg("example @ .")
.current_dir(editable_dir.path()), @r###"
success: true
exit_code: 0
----- stdout -----
----- stderr -----
Resolved 4 packages in [TIME]
Prepared 1 package in [TIME]
Uninstalled 1 package in [TIME]
Installed 1 package in [TIME]
~ example==0.0.0 (from file://[TEMP_DIR]/editable)
"###
);
// Write a new file in the current directory.
editable_dir.child("data.txt").write_str("data")?;
// Installing again should update the package.
uv_snapshot!(context.filters(), context.pip_install()
.arg("example @ .")
.current_dir(editable_dir.path()), @r###"
success: true
exit_code: 0
----- stdout -----
----- stderr -----
Resolved 4 packages in [TIME]
Prepared 1 package in [TIME]
Uninstalled 1 package in [TIME]
Installed 1 package in [TIME]
~ example==0.0.0 (from file://[TEMP_DIR]/editable)
"###
);
Ok(())
}

View file

@ -52,6 +52,21 @@ the following to the project's `pyproject.toml`:
cache-keys = [{ file = "requirements.txt" }]
```
Globs are supported, following the syntax of the
[`glob`](https://docs.rs/glob/0.3.1/glob/struct.Pattern.html) crate. For example, to invalidate the
cache whenever a `.toml` file in the project directory or any of its subdirectories is modified, use
the following:
```toml title="pyproject.toml"
[tool.uv]
cache-keys = [{ file = "**/*.toml" }]
```
!!! note
The use of globs can be expensive, as uv may need to walk the filesystem to determine whether any files have changed.
This may, in turn, requiring traversal of large or deeply nested directories.
As an escape hatch, if a project uses `dynamic` metadata that isn't covered by `tool.uv.cache-keys`,
you can instruct uv to _always_ rebuild and reinstall it by adding the project to the
`tool.uv.reinstall-package` list:

View file

@ -76,13 +76,20 @@ As an example: if a project uses dynamic metadata to read its dependencies from
to ensure that the project is rebuilt whenever the `requirements.txt` file is modified (in
addition to watching the `pyproject.toml`).
Globs are supported, following the syntax of the [`glob`](https://docs.rs/glob/0.3.1/glob/struct.Pattern.html)
crate. For example, to invalidate the cache whenever a `.toml` file in the project directory
or any of its subdirectories is modified, you can specify `cache-keys = [{ file = "**/*.toml" }]`.
Note that the use of globs can be expensive, as uv may need to walk the filesystem to
determine whether any files have changed.
Cache keys can also include version control information. For example, if a project uses
`setuptools_scm` to read its version from a Git tag, you can specify `cache-keys = [{ git = true }, { file = "pyproject.toml" }]`
to include the current Git commit hash in the cache key (in addition to the
`pyproject.toml`).
Cache keys only affect the project defined by the `pyproject.toml` in which they're
specified (as opposed to, e.g., affecting all members in a workspace).
specified (as opposed to, e.g., affecting all members in a workspace), and all paths and
globs are interpreted as relative to the project directory.
**Default value**: `[{ file = "pyproject.toml" }, { file = "setup.py" }, { file = "setup.cfg" }]`

6
uv.schema.json generated
View file

@ -22,7 +22,7 @@
]
},
"cache-keys": {
"description": "The keys to consider when caching builds for the project.\n\nCache keys enable you to specify the files or directories that should trigger a rebuild when modified. By default, uv will rebuild a project whenever the `pyproject.toml`, `setup.py`, or `setup.cfg` files in the project directory are modified, i.e.:\n\n```toml cache-keys = [{ file = \"pyproject.toml\" }, { file = \"setup.py\" }, { file = \"setup.cfg\" }] ```\n\nAs an example: if a project uses dynamic metadata to read its dependencies from a `requirements.txt` file, you can specify `cache-keys = [{ file = \"requirements.txt\" }, { file = \"pyproject.toml\" }]` to ensure that the project is rebuilt whenever the `requirements.txt` file is modified (in addition to watching the `pyproject.toml`).\n\nCache keys can also include version control information. For example, if a project uses `setuptools_scm` to read its version from a Git tag, you can specify `cache-keys = [{ git = true }, { file = \"pyproject.toml\" }]` to include the current Git commit hash in the cache key (in addition to the `pyproject.toml`).\n\nCache keys only affect the project defined by the `pyproject.toml` in which they're specified (as opposed to, e.g., affecting all members in a workspace).",
"description": "The keys to consider when caching builds for the project.\n\nCache keys enable you to specify the files or directories that should trigger a rebuild when modified. By default, uv will rebuild a project whenever the `pyproject.toml`, `setup.py`, or `setup.cfg` files in the project directory are modified, i.e.:\n\n```toml cache-keys = [{ file = \"pyproject.toml\" }, { file = \"setup.py\" }, { file = \"setup.cfg\" }] ```\n\nAs an example: if a project uses dynamic metadata to read its dependencies from a `requirements.txt` file, you can specify `cache-keys = [{ file = \"requirements.txt\" }, { file = \"pyproject.toml\" }]` to ensure that the project is rebuilt whenever the `requirements.txt` file is modified (in addition to watching the `pyproject.toml`).\n\nGlobs are supported, following the syntax of the [`glob`](https://docs.rs/glob/0.3.1/glob/struct.Pattern.html) crate. For example, to invalidate the cache whenever a `.toml` file in the project directory or any of its subdirectories is modified, you can specify `cache-keys = [{ file = \"**/*.toml\" }]`. Note that the use of globs can be expensive, as uv may need to walk the filesystem to determine whether any files have changed.\n\nCache keys can also include version control information. For example, if a project uses `setuptools_scm` to read its version from a Git tag, you can specify `cache-keys = [{ git = true }, { file = \"pyproject.toml\" }]` to include the current Git commit hash in the cache key (in addition to the `pyproject.toml`).\n\nCache keys only affect the project defined by the `pyproject.toml` in which they're specified (as opposed to, e.g., affecting all members in a workspace), and all paths and globs are interpreted as relative to the project directory.",
"writeOnly": true,
"type": [
"array",
@ -432,11 +432,11 @@
"CacheKey": {
"anyOf": [
{
"description": "Ex) `\"Cargo.lock\"`",
"description": "Ex) `\"Cargo.lock\"` or `\"**/*.toml\"`",
"type": "string"
},
{
"description": "Ex) `{ file = \"Cargo.lock\" }`",
"description": "Ex) `{ file = \"Cargo.lock\" }` or `{ file = \"**/*.toml\" }`",
"type": "object",
"required": [
"file"