From d1ed41170bd66d7ffbae9fdb50f7010f3cde750e Mon Sep 17 00:00:00 2001 From: Charlie Marsh Date: Sun, 8 Oct 2023 01:31:19 -0400 Subject: [PATCH] Cache environment marker lookups (#55) Closes https://github.com/astral-sh/puffin/issues/53. --- Cargo.lock | 1 + crates/puffin-cli/src/commands/compile.rs | 2 +- crates/puffin-cli/src/commands/freeze.rs | 9 ++-- crates/puffin-cli/src/commands/sync.rs | 3 +- crates/puffin-cli/src/main.rs | 18 +++++++- crates/puffin-client/src/api.rs | 1 + crates/puffin-interpreter/Cargo.toml | 1 + crates/puffin-interpreter/src/lib.rs | 4 +- crates/puffin-interpreter/src/markers.rs | 50 +++++++++++++++++++++++ 9 files changed, 80 insertions(+), 9 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index aaf2ab288..2545552d7 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1736,6 +1736,7 @@ name = "puffin-interpreter" version = "0.1.0" dependencies = [ "anyhow", + "cacache", "pep440_rs", "pep508_rs", "platform-host", diff --git a/crates/puffin-cli/src/commands/compile.rs b/crates/puffin-cli/src/commands/compile.rs index 3f064d276..2bb689e10 100644 --- a/crates/puffin-cli/src/commands/compile.rs +++ b/crates/puffin-cli/src/commands/compile.rs @@ -22,7 +22,7 @@ pub(crate) async fn compile(src: &Path, cache: Option<&Path>) -> Result Result { +pub(crate) async fn freeze(cache: Option<&Path>) -> Result { // Detect the current Python interpreter. let platform = Platform::current()?; - let python = PythonExecutable::from_env(platform)?; + let python = PythonExecutable::from_env(platform, cache)?; debug!( "Using Python interpreter: {}", python.executable().display() diff --git a/crates/puffin-cli/src/commands/sync.rs b/crates/puffin-cli/src/commands/sync.rs index 0bc89e63e..b914a79ff 100644 --- a/crates/puffin-cli/src/commands/sync.rs +++ b/crates/puffin-cli/src/commands/sync.rs @@ -31,8 +31,9 @@ pub(crate) async fn sync(src: &Path, cache: Option<&Path>, flags: SyncFlags) -> let requirements = Requirements::from_str(&requirements_txt)?; // Detect the current Python interpreter. + // TODO(charlie): This is taking a _lot_ of time, like 20ms. let platform = Platform::current()?; - let python = PythonExecutable::from_env(platform)?; + let python = PythonExecutable::from_env(platform, cache)?; debug!( "Using Python interpreter: {}", python.executable().display() diff --git a/crates/puffin-cli/src/main.rs b/crates/puffin-cli/src/main.rs index 9ad4f2a37..52ccbb8e0 100644 --- a/crates/puffin-cli/src/main.rs +++ b/crates/puffin-cli/src/main.rs @@ -27,7 +27,7 @@ enum Commands { /// Clear the cache. Clean, /// Enumerate the installed packages in the current environment. - Freeze, + Freeze(FreezeArgs), } #[derive(Args)] @@ -54,6 +54,13 @@ struct SyncArgs { ignore_installed: bool, } +#[derive(Args)] +struct FreezeArgs { + /// Avoid reading from or writing to the cache. + #[arg(long)] + no_cache: bool, +} + #[tokio::main] async fn main() -> ExitCode { let cli = Cli::parse(); @@ -87,7 +94,14 @@ async fn main() -> ExitCode { .await } Commands::Clean => commands::clean(dirs.as_ref().map(ProjectDirs::cache_dir)).await, - Commands::Freeze => commands::freeze().await, + Commands::Freeze(args) => { + commands::freeze( + dirs.as_ref() + .map(ProjectDirs::cache_dir) + .filter(|_| !args.no_cache), + ) + .await + } }; match result { diff --git a/crates/puffin-client/src/api.rs b/crates/puffin-client/src/api.rs index 9319dfb8b..0f8303952 100644 --- a/crates/puffin-client/src/api.rs +++ b/crates/puffin-client/src/api.rs @@ -139,6 +139,7 @@ pub struct SimpleJson { pub versions: Vec, } +// TODO(charlie): Can we rename this? What does this look like for source distributions? #[derive(Debug, Clone, Serialize, Deserialize)] #[serde(rename_all = "kebab-case")] pub struct File { diff --git a/crates/puffin-interpreter/Cargo.toml b/crates/puffin-interpreter/Cargo.toml index 1e563e124..6acfab4a9 100644 --- a/crates/puffin-interpreter/Cargo.toml +++ b/crates/puffin-interpreter/Cargo.toml @@ -16,6 +16,7 @@ platform-host = { path = "../platform-host" } puffin-package = { path = "../puffin-package" } anyhow = { workspace = true } +cacache = { workspace = true } serde_json = { workspace = true } tokio = { workspace = true } tracing = { workspace = true } diff --git a/crates/puffin-interpreter/src/lib.rs b/crates/puffin-interpreter/src/lib.rs index 2f85b6b7f..e1f9eb41f 100644 --- a/crates/puffin-interpreter/src/lib.rs +++ b/crates/puffin-interpreter/src/lib.rs @@ -25,11 +25,11 @@ pub struct PythonExecutable { impl PythonExecutable { /// Detect the current Python executable from the host environment. - pub fn from_env(platform: Platform) -> Result { + pub fn from_env(platform: Platform, cache: Option<&Path>) -> Result { let platform = PythonPlatform::from(platform); let venv = virtual_env::detect_virtual_env(&platform)?; let executable = platform.venv_python(&venv); - let markers = markers::detect_markers(&executable)?; + let markers = markers::detect_cached_markers(&executable, cache)?; Ok(Self { platform, diff --git a/crates/puffin-interpreter/src/markers.rs b/crates/puffin-interpreter/src/markers.rs index 03c660788..ac36e1a9a 100644 --- a/crates/puffin-interpreter/src/markers.rs +++ b/crates/puffin-interpreter/src/markers.rs @@ -3,6 +3,7 @@ use std::path::Path; use std::process::{Command, Output}; use anyhow::{Context, Result}; +use tracing::debug; use pep508_rs::MarkerEnvironment; @@ -12,6 +13,55 @@ pub(crate) fn detect_markers(python: impl AsRef) -> Result(&output.stdout)?) } +/// A wrapper around [`markers::detect_markers`] to cache the computed markers. +/// +/// Running a Python script is (relatively) expensive, and the markers won't change +/// unless the Python executable changes, so we use the executable's last modified +/// time as a cache key. +pub(crate) fn detect_cached_markers( + executable: &Path, + cache: Option<&Path>, +) -> Result { + // Read from the cache. + let key = if let Some(cache) = cache { + if let Ok(key) = cache_key(executable) { + if let Ok(data) = cacache::read_sync(cache, &key) { + debug!("Using cached markers for {}", executable.display()); + return Ok(serde_json::from_slice::(&data)?); + } + Some(key) + } else { + None + } + } else { + None + }; + + // Otherwise, run the Python script. + debug!("Detecting markers for {}", executable.display()); + let markers = detect_markers(executable)?; + + // Write to the cache. + if let Some(cache) = cache { + if let Some(key) = key { + cacache::write_sync(cache, key, serde_json::to_vec(&markers)?)?; + } + } + + Ok(markers) +} + +/// Create a cache key for the Python executable, consisting of the executable's +/// last modified time and the executable's path. +fn cache_key(executable: &Path) -> Result { + let modified = executable + .metadata()? + .modified()? + .duration_since(std::time::UNIX_EPOCH)? + .as_millis(); + Ok(format!("puffin:v0:{}:{}", executable.display(), modified)) +} + const CAPTURE_MARKERS_SCRIPT: &str = " import os import sys