mirror of
https://github.com/astral-sh/uv.git
synced 2025-08-03 18:38:21 +00:00
Migrate interpreter query to custom caching (#508)
This removes the last usage of cacache by replacing it with a custom,
flat json caching keyed by the digest of the executable path.

A step towards #478. I've made `CachedByTimestamp<T>` generic over `T`
but intentionally not moved it to `puffin-cache` yet.
This commit is contained in:
parent
5435d44756
commit
d89fbeb642
13 changed files with 88 additions and 156 deletions
|
@ -18,6 +18,7 @@ workspace = true
|
|||
|
||||
[dependencies]
|
||||
platform-host = { path = "../platform-host" }
|
||||
puffin-cache = { path = "../puffin-cache" }
|
||||
puffin-interpreter = { path = "../puffin-interpreter" }
|
||||
|
||||
camino = { workspace = true }
|
||||
|
|
|
@ -11,6 +11,7 @@ use tracing_subscriber::{fmt, EnvFilter};
|
|||
|
||||
use gourgeist::{create_bare_venv, parse_python_cli};
|
||||
use platform_host::Platform;
|
||||
use puffin_cache::Cache;
|
||||
use puffin_interpreter::Interpreter;
|
||||
|
||||
#[derive(Parser, Debug)]
|
||||
|
@ -25,8 +26,8 @@ fn run() -> Result<(), gourgeist::Error> {
|
|||
let location = cli.path.unwrap_or(Utf8PathBuf::from(".venv"));
|
||||
let python = parse_python_cli(cli.python)?;
|
||||
let platform = Platform::current()?;
|
||||
let cache = tempfile::tempdir()?;
|
||||
let info = Interpreter::query(python.as_std_path(), platform, cache.path()).unwrap();
|
||||
let cache = Cache::temp()?;
|
||||
let info = Interpreter::query(python.as_std_path(), platform, &cache).unwrap();
|
||||
create_bare_venv(&location, &info)?;
|
||||
Ok(())
|
||||
}
|
||||
|
|
|
@ -150,6 +150,43 @@ pub enum CacheBucket {
|
|||
/// Git repositories.
|
||||
Git,
|
||||
/// Information about an interpreter at a path.
|
||||
///
|
||||
/// To avoid caching pyenv shims, bash scripts which may redirect to a new python version
|
||||
/// without the shim itself changing, we only cache when the path equals `sys.executable`, i.e.
|
||||
/// the path we're running is the python executable itself and not a shim.
|
||||
///
|
||||
/// Cache structure: `interpreter-v0/<digest(path)>.json`
|
||||
///
|
||||
/// # Example
|
||||
///
|
||||
/// The contents of each of the json files has a timestamp field in unix time, the [PEP 508]
|
||||
/// markers and some information from the `sys`/`sysconfig` modules.
|
||||
///
|
||||
/// ```json
|
||||
/// {
|
||||
/// "timestamp": 1698047994491,
|
||||
/// "data": {
|
||||
/// "markers": {
|
||||
/// "implementation_name": "cpython",
|
||||
/// "implementation_version": "3.12.0",
|
||||
/// "os_name": "posix",
|
||||
/// "platform_machine": "x86_64",
|
||||
/// "platform_python_implementation": "CPython",
|
||||
/// "platform_release": "6.5.0-13-generic",
|
||||
/// "platform_system": "Linux",
|
||||
/// "platform_version": "#13-Ubuntu SMP PREEMPT_DYNAMIC Fri Nov 3 12:16:05 UTC 2023",
|
||||
/// "python_full_version": "3.12.0",
|
||||
/// "python_version": "3.12",
|
||||
/// "sys_platform": "linux"
|
||||
/// },
|
||||
/// "base_exec_prefix": "/home/ferris/.pyenv/versions/3.12.0",
|
||||
/// "base_prefix": "/home/ferris/.pyenv/versions/3.12.0",
|
||||
/// "sys_executable": "/home/ferris/projects/puffin/.venv/bin/python"
|
||||
/// }
|
||||
/// }
|
||||
/// ```
|
||||
///
|
||||
/// [PEP 508]: https://peps.python.org/pep-0508/#environment-markers
|
||||
Interpreter,
|
||||
/// Index responses through the simple metadata API.
|
||||
Simple,
|
||||
|
|
|
@ -39,7 +39,6 @@ requirements-txt = { path = "../requirements-txt" }
|
|||
anstream = { workspace = true }
|
||||
anyhow = { workspace = true }
|
||||
bitflags = { workspace = true }
|
||||
cacache = { workspace = true }
|
||||
chrono = { workspace = true }
|
||||
clap = { workspace = true, features = ["derive"] }
|
||||
colored = { workspace = true }
|
||||
|
|
|
@ -8,7 +8,7 @@ use miette::{Diagnostic, IntoDiagnostic};
|
|||
use thiserror::Error;
|
||||
|
||||
use platform_host::Platform;
|
||||
use puffin_cache::{Cache, CacheBucket};
|
||||
use puffin_cache::Cache;
|
||||
use puffin_interpreter::Interpreter;
|
||||
|
||||
use crate::commands::ExitStatus;
|
||||
|
@ -77,12 +77,8 @@ fn venv_impl(
|
|||
};
|
||||
|
||||
let platform = Platform::current().into_diagnostic()?;
|
||||
let interpreter_info = Interpreter::query(
|
||||
&base_python,
|
||||
platform,
|
||||
&cache.bucket(CacheBucket::Interpreter),
|
||||
)
|
||||
.map_err(VenvError::InterpreterError)?;
|
||||
let interpreter_info =
|
||||
Interpreter::query(&base_python, platform, cache).map_err(VenvError::InterpreterError)?;
|
||||
|
||||
writeln!(
|
||||
printer,
|
||||
|
|
|
@ -32,3 +32,4 @@ url = { workspace = true }
|
|||
|
||||
[dev-dependencies]
|
||||
anyhow = { workspace = true }
|
||||
tokio = { workspace = true, features = ["fs", "macros"] }
|
||||
|
|
|
@ -18,7 +18,6 @@ pep508_rs = { path = "../pep508-rs", features = ["serde"] }
|
|||
platform-host = { path = "../platform-host" }
|
||||
puffin-cache = { path = "../puffin-cache" }
|
||||
|
||||
cacache = { workspace = true }
|
||||
fs-err = { workspace = true, features = ["tokio"] }
|
||||
serde_json = { workspace = true }
|
||||
thiserror = { workspace = true }
|
||||
|
|
|
@ -2,12 +2,14 @@ use std::path::{Path, PathBuf};
|
|||
use std::process::Command;
|
||||
use std::time::UNIX_EPOCH;
|
||||
|
||||
use fs_err as fs;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use tracing::debug;
|
||||
|
||||
use pep440_rs::Version;
|
||||
use pep508_rs::MarkerEnvironment;
|
||||
use platform_host::Platform;
|
||||
use puffin_cache::{digest, Cache, CacheBucket};
|
||||
|
||||
use crate::python_platform::PythonPlatform;
|
||||
use crate::Error;
|
||||
|
@ -24,7 +26,7 @@ pub struct Interpreter {
|
|||
|
||||
impl Interpreter {
|
||||
/// Detect the interpreter info for the given Python executable.
|
||||
pub fn query(executable: &Path, platform: Platform, cache: &Path) -> Result<Self, Error> {
|
||||
pub fn query(executable: &Path, platform: Platform, cache: &Cache) -> Result<Self, Error> {
|
||||
let info = InterpreterQueryResult::query_cached(executable, cache)?;
|
||||
debug_assert!(
|
||||
info.base_prefix == info.base_exec_prefix,
|
||||
|
@ -101,7 +103,7 @@ impl Interpreter {
|
|||
}
|
||||
}
|
||||
|
||||
#[derive(Deserialize, Serialize)]
|
||||
#[derive(Deserialize, Serialize, Clone)]
|
||||
pub(crate) struct InterpreterQueryResult {
|
||||
pub(crate) markers: MarkerEnvironment,
|
||||
pub(crate) base_exec_prefix: PathBuf,
|
||||
|
@ -109,6 +111,12 @@ pub(crate) struct InterpreterQueryResult {
|
|||
pub(crate) sys_executable: PathBuf,
|
||||
}
|
||||
|
||||
#[derive(Deserialize, Serialize)]
|
||||
pub(crate) struct CachedByTimestamp<T> {
|
||||
pub(crate) timestamp: u128,
|
||||
pub(crate) data: T,
|
||||
}
|
||||
|
||||
impl InterpreterQueryResult {
|
||||
/// Return the resolved [`InterpreterQueryResult`] for the given Python executable.
|
||||
pub(crate) fn query(interpreter: &Path) -> Result<Self, Error> {
|
||||
|
@ -153,48 +161,43 @@ impl InterpreterQueryResult {
|
|||
/// Running a Python script is (relatively) expensive, and the markers won't change
|
||||
/// unless the Python executable changes, so we use the executable's last modified
|
||||
/// time as a cache key.
|
||||
pub(crate) fn query_cached(executable: &Path, cache: &Path) -> Result<Self, Error> {
|
||||
pub(crate) fn query_cached(executable: &Path, cache: &Cache) -> Result<Self, Error> {
|
||||
let executable_bytes = executable.as_os_str().as_encoded_bytes();
|
||||
let cache_dir = cache.bucket(CacheBucket::Interpreter);
|
||||
let cache_path = cache_dir.join(format!("{}.json", digest(&executable_bytes)));
|
||||
|
||||
// Read from the cache.
|
||||
let key = if let Ok(key) = cache_key(executable) {
|
||||
if let Ok(data) = cacache::read_sync(cache, &key) {
|
||||
if let Ok(info) = serde_json::from_slice::<Self>(&data) {
|
||||
debug!("Using cached markers for {}", executable.display());
|
||||
return Ok(info);
|
||||
}
|
||||
if let Ok(data) = fs::read(&cache_path) {
|
||||
if let Ok(cached) = serde_json::from_slice::<CachedByTimestamp<Self>>(&data) {
|
||||
debug!("Using cached markers for {}", executable.display());
|
||||
return Ok(cached.data);
|
||||
}
|
||||
Some(key)
|
||||
} else {
|
||||
None
|
||||
};
|
||||
}
|
||||
|
||||
// Otherwise, run the Python script.
|
||||
debug!("Detecting markers for {}", executable.display());
|
||||
let info = Self::query(executable)?;
|
||||
|
||||
// If the executable is actually a pyenv shim, a bash script that redirects to the activated
|
||||
// python, we're not allowed to cache the interpreter info
|
||||
let modified = fs_err::metadata(executable)?
|
||||
// Note: This is infallible on windows and unix (i.e. all platforms we support)
|
||||
.modified()?
|
||||
.duration_since(UNIX_EPOCH)
|
||||
.map_err(|err| Error::SystemTime(executable.to_path_buf(), err))?;
|
||||
|
||||
// If `executable` is a pyenv shim, a bash script that redirects to the activated
|
||||
// python executable at another path, we're not allowed to cache the interpreter info
|
||||
if executable == info.sys_executable {
|
||||
fs::create_dir_all(cache_dir)?;
|
||||
// Write to the cache.
|
||||
if let Some(key) = key {
|
||||
cacache::write_sync(cache, key, serde_json::to_vec(&info)?)?;
|
||||
}
|
||||
fs::write(
|
||||
cache_path,
|
||||
serde_json::to_vec(&CachedByTimestamp {
|
||||
timestamp: modified.as_millis(),
|
||||
data: info.clone(),
|
||||
})?,
|
||||
)?;
|
||||
}
|
||||
|
||||
Ok(info)
|
||||
}
|
||||
}
|
||||
|
||||
/// Create a cache key for the Python executable, consisting of the executable's
|
||||
/// last modified time and the executable's path.
|
||||
fn cache_key(executable: &Path) -> Result<String, Error> {
|
||||
let modified = fs_err::metadata(executable)?
|
||||
// Note: This is infallible on windows and unix (i.e. all platforms we support)
|
||||
.modified()?
|
||||
.duration_since(UNIX_EPOCH)
|
||||
.map_err(|err| Error::SystemTime(executable.to_path_buf(), err))?;
|
||||
Ok(format!(
|
||||
"puffin:v0:{}:{}",
|
||||
executable.display(),
|
||||
modified.as_millis()
|
||||
))
|
||||
}
|
||||
|
|
|
@ -38,7 +38,5 @@ pub enum Error {
|
|||
stderr: String,
|
||||
},
|
||||
#[error("Failed to write to cache")]
|
||||
Cacache(#[from] cacache::Error),
|
||||
#[error("Failed to write to cache")]
|
||||
Serde(#[from] serde_json::Error),
|
||||
}
|
||||
|
|
|
@ -4,7 +4,7 @@ use std::path::{Path, PathBuf};
|
|||
use tracing::debug;
|
||||
|
||||
use platform_host::Platform;
|
||||
use puffin_cache::{Cache, CacheBucket};
|
||||
use puffin_cache::Cache;
|
||||
|
||||
use crate::python_platform::PythonPlatform;
|
||||
use crate::{Error, Interpreter};
|
||||
|
@ -24,11 +24,7 @@ impl Virtualenv {
|
|||
return Err(Error::NotFound);
|
||||
};
|
||||
let executable = platform.venv_python(&venv);
|
||||
let interpreter = Interpreter::query(
|
||||
&executable,
|
||||
platform.0,
|
||||
&cache.bucket(CacheBucket::Interpreter),
|
||||
)?;
|
||||
let interpreter = Interpreter::query(&executable, platform.0, cache)?;
|
||||
|
||||
Ok(Self {
|
||||
root: venv,
|
||||
|
@ -39,11 +35,7 @@ impl Virtualenv {
|
|||
pub fn from_virtualenv(platform: Platform, root: &Path, cache: &Cache) -> Result<Self, Error> {
|
||||
let platform = PythonPlatform::from(platform);
|
||||
let executable = platform.venv_python(root);
|
||||
let interpreter = Interpreter::query(
|
||||
&executable,
|
||||
platform.0,
|
||||
&cache.bucket(CacheBucket::Interpreter),
|
||||
)?;
|
||||
let interpreter = Interpreter::query(&executable, platform.0, cache)?;
|
||||
|
||||
Ok(Self {
|
||||
root: root.to_path_buf(),
|
||||
|
|
|
@ -49,7 +49,7 @@ serde_json = { workspace = true }
|
|||
sha2 = { workspace = true }
|
||||
tempfile = { workspace = true }
|
||||
thiserror = { workspace = true }
|
||||
tokio = { workspace = true }
|
||||
tokio = { workspace = true, features = ["macros"] }
|
||||
tokio-util = { workspace = true, features = ["compat"] }
|
||||
tracing = { workspace = true }
|
||||
url = { workspace = true }
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue