mirror of
https://github.com/astral-sh/uv.git
synced 2025-08-04 02:48:17 +00:00
Migrate interpreter query to custom caching (#508)
This removes the last usage of cacache by replacing it with a custom,
flat json caching keyed by the digest of the executable path.

A step towards #478. I've made `CachedByTimestamp<T>` generic over `T`
but intentionally not moved it to `puffin-cache` yet.
This commit is contained in:
parent
5435d44756
commit
d89fbeb642
13 changed files with 88 additions and 156 deletions
100
Cargo.lock
generated
100
Cargo.lock
generated
|
@ -391,31 +391,6 @@ version = "1.3.0"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a3e368af43e418a04d52505cf3dbc23dda4e3407ae2fa99fd0e4f308ce546acc"
|
||||
|
||||
[[package]]
|
||||
name = "cacache"
|
||||
version = "12.0.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "142316461ed3a3dfcba10417317472da5bfd0461e4d276bf7c07b330766d9490"
|
||||
dependencies = [
|
||||
"digest",
|
||||
"either",
|
||||
"futures",
|
||||
"hex",
|
||||
"miette 5.10.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"reflink-copy",
|
||||
"serde",
|
||||
"serde_derive",
|
||||
"serde_json",
|
||||
"sha1",
|
||||
"sha2",
|
||||
"ssri",
|
||||
"tempfile",
|
||||
"thiserror",
|
||||
"tokio",
|
||||
"tokio-stream",
|
||||
"walkdir",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "camino"
|
||||
version = "1.1.6"
|
||||
|
@ -1188,6 +1163,7 @@ dependencies = [
|
|||
"clap",
|
||||
"fs-err",
|
||||
"platform-host",
|
||||
"puffin-cache",
|
||||
"puffin-interpreter",
|
||||
"serde",
|
||||
"serde_json",
|
||||
|
@ -1753,18 +1729,6 @@ dependencies = [
|
|||
"autocfg",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "miette"
|
||||
version = "5.10.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "59bb584eaeeab6bd0226ccf3509a69d7936d148cf3d036ad350abe35e8c6856e"
|
||||
dependencies = [
|
||||
"miette-derive 5.10.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"once_cell",
|
||||
"thiserror",
|
||||
"unicode-width",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "miette"
|
||||
version = "5.10.0"
|
||||
|
@ -1773,7 +1737,7 @@ dependencies = [
|
|||
"backtrace",
|
||||
"backtrace-ext",
|
||||
"is-terminal",
|
||||
"miette-derive 5.10.0 (git+https://github.com/zkat/miette.git?rev=fd77257cee0f5d03aa7dccb4ba8cbaa40c1a88c6)",
|
||||
"miette-derive",
|
||||
"once_cell",
|
||||
"owo-colors",
|
||||
"supports-color",
|
||||
|
@ -1785,17 +1749,6 @@ dependencies = [
|
|||
"unicode-width",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "miette-derive"
|
||||
version = "5.10.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "49e7bc1560b95a3c4a25d03de42fe76ca718ab92d1a22a55b9b4cf67b3ae635c"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn 2.0.39",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "miette-derive"
|
||||
version = "5.10.0"
|
||||
|
@ -2338,7 +2291,6 @@ dependencies = [
|
|||
"assert_cmd",
|
||||
"assert_fs",
|
||||
"bitflags 2.4.1",
|
||||
"cacache",
|
||||
"chrono",
|
||||
"clap",
|
||||
"colored",
|
||||
|
@ -2351,7 +2303,7 @@ dependencies = [
|
|||
"insta-cmd",
|
||||
"install-wheel-rs",
|
||||
"itertools 0.11.0",
|
||||
"miette 5.10.0 (git+https://github.com/zkat/miette.git?rev=fd77257cee0f5d03aa7dccb4ba8cbaa40c1a88c6)",
|
||||
"miette",
|
||||
"mimalloc",
|
||||
"pep440_rs 0.3.12",
|
||||
"pep508_rs",
|
||||
|
@ -2567,7 +2519,6 @@ dependencies = [
|
|||
name = "puffin-interpreter"
|
||||
version = "0.0.1"
|
||||
dependencies = [
|
||||
"cacache",
|
||||
"fs-err",
|
||||
"pep440_rs 0.3.12",
|
||||
"pep508_rs",
|
||||
|
@ -3262,28 +3213,6 @@ dependencies = [
|
|||
"serde",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "sha-1"
|
||||
version = "0.10.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f5058ada175748e33390e40e872bd0fe59a19f265d0158daa551c5a88a76009c"
|
||||
dependencies = [
|
||||
"cfg-if 1.0.0",
|
||||
"cpufeatures",
|
||||
"digest",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "sha1"
|
||||
version = "0.10.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e3bf829a2d51ab4a5ddf1352d8470c140cadc8301b2ae1789db023f01cedd6ba"
|
||||
dependencies = [
|
||||
"cfg-if 1.0.0",
|
||||
"cpufeatures",
|
||||
"digest",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "sha2"
|
||||
version = "0.10.8"
|
||||
|
@ -3363,23 +3292,6 @@ version = "0.9.8"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "6980e8d7511241f8acf4aebddbb1ff938df5eebe98691418c4468d0b72a96a67"
|
||||
|
||||
[[package]]
|
||||
name = "ssri"
|
||||
version = "9.2.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "da7a2b3c2bc9693bcb40870c4e9b5bf0d79f9cb46273321bf855ec513e919082"
|
||||
dependencies = [
|
||||
"base64 0.21.5",
|
||||
"digest",
|
||||
"hex",
|
||||
"miette 5.10.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"serde",
|
||||
"sha-1",
|
||||
"sha2",
|
||||
"thiserror",
|
||||
"xxhash-rust",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "stacker"
|
||||
version = "0.1.15"
|
||||
|
@ -4440,12 +4352,6 @@ dependencies = [
|
|||
"libc",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "xxhash-rust"
|
||||
version = "0.8.7"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "9828b178da53440fa9c766a3d2f73f7cf5d0ac1fe3980c1e5018d899fd19e07b"
|
||||
|
||||
[[package]]
|
||||
name = "yaml-rust"
|
||||
version = "0.4.5"
|
||||
|
|
|
@ -18,7 +18,6 @@ async_http_range_reader = { git = "https://github.com/baszalmstra/async_http_ran
|
|||
async_zip = { version = "0.0.15", features = ["tokio", "deflate"] }
|
||||
bitflags = { version = "2.4.1" }
|
||||
bytesize = { version = "1.3.0" }
|
||||
cacache = { version = "12.0.0", default-features = false, features = ["tokio-runtime"] }
|
||||
camino = { version = "1.1.6", features = ["serde1"] }
|
||||
cargo-util = { version = "0.2.6" }
|
||||
chrono = { version = "0.4.31" }
|
||||
|
|
|
@ -18,6 +18,7 @@ workspace = true
|
|||
|
||||
[dependencies]
|
||||
platform-host = { path = "../platform-host" }
|
||||
puffin-cache = { path = "../puffin-cache" }
|
||||
puffin-interpreter = { path = "../puffin-interpreter" }
|
||||
|
||||
camino = { workspace = true }
|
||||
|
|
|
@ -11,6 +11,7 @@ use tracing_subscriber::{fmt, EnvFilter};
|
|||
|
||||
use gourgeist::{create_bare_venv, parse_python_cli};
|
||||
use platform_host::Platform;
|
||||
use puffin_cache::Cache;
|
||||
use puffin_interpreter::Interpreter;
|
||||
|
||||
#[derive(Parser, Debug)]
|
||||
|
@ -25,8 +26,8 @@ fn run() -> Result<(), gourgeist::Error> {
|
|||
let location = cli.path.unwrap_or(Utf8PathBuf::from(".venv"));
|
||||
let python = parse_python_cli(cli.python)?;
|
||||
let platform = Platform::current()?;
|
||||
let cache = tempfile::tempdir()?;
|
||||
let info = Interpreter::query(python.as_std_path(), platform, cache.path()).unwrap();
|
||||
let cache = Cache::temp()?;
|
||||
let info = Interpreter::query(python.as_std_path(), platform, &cache).unwrap();
|
||||
create_bare_venv(&location, &info)?;
|
||||
Ok(())
|
||||
}
|
||||
|
|
|
@ -150,6 +150,43 @@ pub enum CacheBucket {
|
|||
/// Git repositories.
|
||||
Git,
|
||||
/// Information about an interpreter at a path.
|
||||
///
|
||||
/// To avoid caching pyenv shims, bash scripts which may redirect to a new python version
|
||||
/// without the shim itself changing, we only cache when the path equals `sys.executable`, i.e.
|
||||
/// the path we're running is the python executable itself and not a shim.
|
||||
///
|
||||
/// Cache structure: `interpreter-v0/<digest(path)>.json`
|
||||
///
|
||||
/// # Example
|
||||
///
|
||||
/// The contents of each of the json files has a timestamp field in unix time, the [PEP 508]
|
||||
/// markers and some information from the `sys`/`sysconfig` modules.
|
||||
///
|
||||
/// ```json
|
||||
/// {
|
||||
/// "timestamp": 1698047994491,
|
||||
/// "data": {
|
||||
/// "markers": {
|
||||
/// "implementation_name": "cpython",
|
||||
/// "implementation_version": "3.12.0",
|
||||
/// "os_name": "posix",
|
||||
/// "platform_machine": "x86_64",
|
||||
/// "platform_python_implementation": "CPython",
|
||||
/// "platform_release": "6.5.0-13-generic",
|
||||
/// "platform_system": "Linux",
|
||||
/// "platform_version": "#13-Ubuntu SMP PREEMPT_DYNAMIC Fri Nov 3 12:16:05 UTC 2023",
|
||||
/// "python_full_version": "3.12.0",
|
||||
/// "python_version": "3.12",
|
||||
/// "sys_platform": "linux"
|
||||
/// },
|
||||
/// "base_exec_prefix": "/home/ferris/.pyenv/versions/3.12.0",
|
||||
/// "base_prefix": "/home/ferris/.pyenv/versions/3.12.0",
|
||||
/// "sys_executable": "/home/ferris/projects/puffin/.venv/bin/python"
|
||||
/// }
|
||||
/// }
|
||||
/// ```
|
||||
///
|
||||
/// [PEP 508]: https://peps.python.org/pep-0508/#environment-markers
|
||||
Interpreter,
|
||||
/// Index responses through the simple metadata API.
|
||||
Simple,
|
||||
|
|
|
@ -39,7 +39,6 @@ requirements-txt = { path = "../requirements-txt" }
|
|||
anstream = { workspace = true }
|
||||
anyhow = { workspace = true }
|
||||
bitflags = { workspace = true }
|
||||
cacache = { workspace = true }
|
||||
chrono = { workspace = true }
|
||||
clap = { workspace = true, features = ["derive"] }
|
||||
colored = { workspace = true }
|
||||
|
|
|
@ -8,7 +8,7 @@ use miette::{Diagnostic, IntoDiagnostic};
|
|||
use thiserror::Error;
|
||||
|
||||
use platform_host::Platform;
|
||||
use puffin_cache::{Cache, CacheBucket};
|
||||
use puffin_cache::Cache;
|
||||
use puffin_interpreter::Interpreter;
|
||||
|
||||
use crate::commands::ExitStatus;
|
||||
|
@ -77,12 +77,8 @@ fn venv_impl(
|
|||
};
|
||||
|
||||
let platform = Platform::current().into_diagnostic()?;
|
||||
let interpreter_info = Interpreter::query(
|
||||
&base_python,
|
||||
platform,
|
||||
&cache.bucket(CacheBucket::Interpreter),
|
||||
)
|
||||
.map_err(VenvError::InterpreterError)?;
|
||||
let interpreter_info =
|
||||
Interpreter::query(&base_python, platform, cache).map_err(VenvError::InterpreterError)?;
|
||||
|
||||
writeln!(
|
||||
printer,
|
||||
|
|
|
@ -32,3 +32,4 @@ url = { workspace = true }
|
|||
|
||||
[dev-dependencies]
|
||||
anyhow = { workspace = true }
|
||||
tokio = { workspace = true, features = ["fs", "macros"] }
|
||||
|
|
|
@ -18,7 +18,6 @@ pep508_rs = { path = "../pep508-rs", features = ["serde"] }
|
|||
platform-host = { path = "../platform-host" }
|
||||
puffin-cache = { path = "../puffin-cache" }
|
||||
|
||||
cacache = { workspace = true }
|
||||
fs-err = { workspace = true, features = ["tokio"] }
|
||||
serde_json = { workspace = true }
|
||||
thiserror = { workspace = true }
|
||||
|
|
|
@ -2,12 +2,14 @@ use std::path::{Path, PathBuf};
|
|||
use std::process::Command;
|
||||
use std::time::UNIX_EPOCH;
|
||||
|
||||
use fs_err as fs;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use tracing::debug;
|
||||
|
||||
use pep440_rs::Version;
|
||||
use pep508_rs::MarkerEnvironment;
|
||||
use platform_host::Platform;
|
||||
use puffin_cache::{digest, Cache, CacheBucket};
|
||||
|
||||
use crate::python_platform::PythonPlatform;
|
||||
use crate::Error;
|
||||
|
@ -24,7 +26,7 @@ pub struct Interpreter {
|
|||
|
||||
impl Interpreter {
|
||||
/// Detect the interpreter info for the given Python executable.
|
||||
pub fn query(executable: &Path, platform: Platform, cache: &Path) -> Result<Self, Error> {
|
||||
pub fn query(executable: &Path, platform: Platform, cache: &Cache) -> Result<Self, Error> {
|
||||
let info = InterpreterQueryResult::query_cached(executable, cache)?;
|
||||
debug_assert!(
|
||||
info.base_prefix == info.base_exec_prefix,
|
||||
|
@ -101,7 +103,7 @@ impl Interpreter {
|
|||
}
|
||||
}
|
||||
|
||||
#[derive(Deserialize, Serialize)]
|
||||
#[derive(Deserialize, Serialize, Clone)]
|
||||
pub(crate) struct InterpreterQueryResult {
|
||||
pub(crate) markers: MarkerEnvironment,
|
||||
pub(crate) base_exec_prefix: PathBuf,
|
||||
|
@ -109,6 +111,12 @@ pub(crate) struct InterpreterQueryResult {
|
|||
pub(crate) sys_executable: PathBuf,
|
||||
}
|
||||
|
||||
#[derive(Deserialize, Serialize)]
|
||||
pub(crate) struct CachedByTimestamp<T> {
|
||||
pub(crate) timestamp: u128,
|
||||
pub(crate) data: T,
|
||||
}
|
||||
|
||||
impl InterpreterQueryResult {
|
||||
/// Return the resolved [`InterpreterQueryResult`] for the given Python executable.
|
||||
pub(crate) fn query(interpreter: &Path) -> Result<Self, Error> {
|
||||
|
@ -153,48 +161,43 @@ impl InterpreterQueryResult {
|
|||
/// Running a Python script is (relatively) expensive, and the markers won't change
|
||||
/// unless the Python executable changes, so we use the executable's last modified
|
||||
/// time as a cache key.
|
||||
pub(crate) fn query_cached(executable: &Path, cache: &Path) -> Result<Self, Error> {
|
||||
pub(crate) fn query_cached(executable: &Path, cache: &Cache) -> Result<Self, Error> {
|
||||
let executable_bytes = executable.as_os_str().as_encoded_bytes();
|
||||
let cache_dir = cache.bucket(CacheBucket::Interpreter);
|
||||
let cache_path = cache_dir.join(format!("{}.json", digest(&executable_bytes)));
|
||||
|
||||
// Read from the cache.
|
||||
let key = if let Ok(key) = cache_key(executable) {
|
||||
if let Ok(data) = cacache::read_sync(cache, &key) {
|
||||
if let Ok(info) = serde_json::from_slice::<Self>(&data) {
|
||||
debug!("Using cached markers for {}", executable.display());
|
||||
return Ok(info);
|
||||
}
|
||||
if let Ok(data) = fs::read(&cache_path) {
|
||||
if let Ok(cached) = serde_json::from_slice::<CachedByTimestamp<Self>>(&data) {
|
||||
debug!("Using cached markers for {}", executable.display());
|
||||
return Ok(cached.data);
|
||||
}
|
||||
Some(key)
|
||||
} else {
|
||||
None
|
||||
};
|
||||
}
|
||||
|
||||
// Otherwise, run the Python script.
|
||||
debug!("Detecting markers for {}", executable.display());
|
||||
let info = Self::query(executable)?;
|
||||
|
||||
// If the executable is actually a pyenv shim, a bash script that redirects to the activated
|
||||
// python, we're not allowed to cache the interpreter info
|
||||
let modified = fs_err::metadata(executable)?
|
||||
// Note: This is infallible on windows and unix (i.e. all platforms we support)
|
||||
.modified()?
|
||||
.duration_since(UNIX_EPOCH)
|
||||
.map_err(|err| Error::SystemTime(executable.to_path_buf(), err))?;
|
||||
|
||||
// If `executable` is a pyenv shim, a bash script that redirects to the activated
|
||||
// python executable at another path, we're not allowed to cache the interpreter info
|
||||
if executable == info.sys_executable {
|
||||
fs::create_dir_all(cache_dir)?;
|
||||
// Write to the cache.
|
||||
if let Some(key) = key {
|
||||
cacache::write_sync(cache, key, serde_json::to_vec(&info)?)?;
|
||||
}
|
||||
fs::write(
|
||||
cache_path,
|
||||
serde_json::to_vec(&CachedByTimestamp {
|
||||
timestamp: modified.as_millis(),
|
||||
data: info.clone(),
|
||||
})?,
|
||||
)?;
|
||||
}
|
||||
|
||||
Ok(info)
|
||||
}
|
||||
}
|
||||
|
||||
/// Create a cache key for the Python executable, consisting of the executable's
|
||||
/// last modified time and the executable's path.
|
||||
fn cache_key(executable: &Path) -> Result<String, Error> {
|
||||
let modified = fs_err::metadata(executable)?
|
||||
// Note: This is infallible on windows and unix (i.e. all platforms we support)
|
||||
.modified()?
|
||||
.duration_since(UNIX_EPOCH)
|
||||
.map_err(|err| Error::SystemTime(executable.to_path_buf(), err))?;
|
||||
Ok(format!(
|
||||
"puffin:v0:{}:{}",
|
||||
executable.display(),
|
||||
modified.as_millis()
|
||||
))
|
||||
}
|
||||
|
|
|
@ -38,7 +38,5 @@ pub enum Error {
|
|||
stderr: String,
|
||||
},
|
||||
#[error("Failed to write to cache")]
|
||||
Cacache(#[from] cacache::Error),
|
||||
#[error("Failed to write to cache")]
|
||||
Serde(#[from] serde_json::Error),
|
||||
}
|
||||
|
|
|
@ -4,7 +4,7 @@ use std::path::{Path, PathBuf};
|
|||
use tracing::debug;
|
||||
|
||||
use platform_host::Platform;
|
||||
use puffin_cache::{Cache, CacheBucket};
|
||||
use puffin_cache::Cache;
|
||||
|
||||
use crate::python_platform::PythonPlatform;
|
||||
use crate::{Error, Interpreter};
|
||||
|
@ -24,11 +24,7 @@ impl Virtualenv {
|
|||
return Err(Error::NotFound);
|
||||
};
|
||||
let executable = platform.venv_python(&venv);
|
||||
let interpreter = Interpreter::query(
|
||||
&executable,
|
||||
platform.0,
|
||||
&cache.bucket(CacheBucket::Interpreter),
|
||||
)?;
|
||||
let interpreter = Interpreter::query(&executable, platform.0, cache)?;
|
||||
|
||||
Ok(Self {
|
||||
root: venv,
|
||||
|
@ -39,11 +35,7 @@ impl Virtualenv {
|
|||
pub fn from_virtualenv(platform: Platform, root: &Path, cache: &Cache) -> Result<Self, Error> {
|
||||
let platform = PythonPlatform::from(platform);
|
||||
let executable = platform.venv_python(root);
|
||||
let interpreter = Interpreter::query(
|
||||
&executable,
|
||||
platform.0,
|
||||
&cache.bucket(CacheBucket::Interpreter),
|
||||
)?;
|
||||
let interpreter = Interpreter::query(&executable, platform.0, cache)?;
|
||||
|
||||
Ok(Self {
|
||||
root: root.to_path_buf(),
|
||||
|
|
|
@ -49,7 +49,7 @@ serde_json = { workspace = true }
|
|||
sha2 = { workspace = true }
|
||||
tempfile = { workspace = true }
|
||||
thiserror = { workspace = true }
|
||||
tokio = { workspace = true }
|
||||
tokio = { workspace = true, features = ["macros"] }
|
||||
tokio-util = { workspace = true, features = ["compat"] }
|
||||
tracing = { workspace = true }
|
||||
url = { workspace = true }
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue