Cache environment marker lookups (#55)

Closes https://github.com/astral-sh/puffin/issues/53.
This commit is contained in:
Charlie Marsh 2023-10-08 01:31:19 -04:00 committed by GitHub
parent 5eef6e9636
commit d1ed41170b
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
9 changed files with 80 additions and 9 deletions

1
Cargo.lock generated
View file

@ -1736,6 +1736,7 @@ name = "puffin-interpreter"
version = "0.1.0"
dependencies = [
"anyhow",
"cacache",
"pep440_rs",
"pep508_rs",
"platform-host",

View file

@ -22,7 +22,7 @@ pub(crate) async fn compile(src: &Path, cache: Option<&Path>) -> Result<ExitStat
// Detect the current Python interpreter.
let platform = Platform::current()?;
let python = PythonExecutable::from_env(platform)?;
let python = PythonExecutable::from_env(platform, cache)?;
debug!(
"Using Python interpreter: {}",
python.executable().display()

View file

@ -1,15 +1,18 @@
use std::path::Path;
use anyhow::Result;
use tracing::debug;
use platform_host::Platform;
use puffin_interpreter::{PythonExecutable, SitePackages};
use tracing::debug;
use crate::commands::ExitStatus;
/// Enumerate the installed packages in the current environment.
pub(crate) async fn freeze() -> Result<ExitStatus> {
pub(crate) async fn freeze(cache: Option<&Path>) -> Result<ExitStatus> {
// Detect the current Python interpreter.
let platform = Platform::current()?;
let python = PythonExecutable::from_env(platform)?;
let python = PythonExecutable::from_env(platform, cache)?;
debug!(
"Using Python interpreter: {}",
python.executable().display()

View file

@ -31,8 +31,9 @@ pub(crate) async fn sync(src: &Path, cache: Option<&Path>, flags: SyncFlags) ->
let requirements = Requirements::from_str(&requirements_txt)?;
// Detect the current Python interpreter.
// TODO(charlie): This is taking a _lot_ of time, like 20ms.
let platform = Platform::current()?;
let python = PythonExecutable::from_env(platform)?;
let python = PythonExecutable::from_env(platform, cache)?;
debug!(
"Using Python interpreter: {}",
python.executable().display()

View file

@ -27,7 +27,7 @@ enum Commands {
/// Clear the cache.
Clean,
/// Enumerate the installed packages in the current environment.
Freeze,
Freeze(FreezeArgs),
}
#[derive(Args)]
@ -54,6 +54,13 @@ struct SyncArgs {
ignore_installed: bool,
}
#[derive(Args)]
struct FreezeArgs {
/// Avoid reading from or writing to the cache.
#[arg(long)]
no_cache: bool,
}
#[tokio::main]
async fn main() -> ExitCode {
let cli = Cli::parse();
@ -87,7 +94,14 @@ async fn main() -> ExitCode {
.await
}
Commands::Clean => commands::clean(dirs.as_ref().map(ProjectDirs::cache_dir)).await,
Commands::Freeze => commands::freeze().await,
Commands::Freeze(args) => {
commands::freeze(
dirs.as_ref()
.map(ProjectDirs::cache_dir)
.filter(|_| !args.no_cache),
)
.await
}
};
match result {

View file

@ -139,6 +139,7 @@ pub struct SimpleJson {
pub versions: Vec<String>,
}
// TODO(charlie): Can we rename this? What does this look like for source distributions?
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(rename_all = "kebab-case")]
pub struct File {

View file

@ -16,6 +16,7 @@ platform-host = { path = "../platform-host" }
puffin-package = { path = "../puffin-package" }
anyhow = { workspace = true }
cacache = { workspace = true }
serde_json = { workspace = true }
tokio = { workspace = true }
tracing = { workspace = true }

View file

@ -25,11 +25,11 @@ pub struct PythonExecutable {
impl PythonExecutable {
/// Detect the current Python executable from the host environment.
pub fn from_env(platform: Platform) -> Result<Self> {
pub fn from_env(platform: Platform, cache: Option<&Path>) -> Result<Self> {
let platform = PythonPlatform::from(platform);
let venv = virtual_env::detect_virtual_env(&platform)?;
let executable = platform.venv_python(&venv);
let markers = markers::detect_markers(&executable)?;
let markers = markers::detect_cached_markers(&executable, cache)?;
Ok(Self {
platform,

View file

@ -3,6 +3,7 @@ use std::path::Path;
use std::process::{Command, Output};
use anyhow::{Context, Result};
use tracing::debug;
use pep508_rs::MarkerEnvironment;
@ -12,6 +13,55 @@ pub(crate) fn detect_markers(python: impl AsRef<Path>) -> Result<MarkerEnvironme
Ok(serde_json::from_slice::<MarkerEnvironment>(&output.stdout)?)
}
/// A wrapper around [`markers::detect_markers`] to cache the computed markers.
///
/// Running a Python script is (relatively) expensive, and the markers won't change
/// unless the Python executable changes, so we use the executable's last modified
/// time as a cache key.
pub(crate) fn detect_cached_markers(
executable: &Path,
cache: Option<&Path>,
) -> Result<MarkerEnvironment> {
// Read from the cache.
let key = if let Some(cache) = cache {
if let Ok(key) = cache_key(executable) {
if let Ok(data) = cacache::read_sync(cache, &key) {
debug!("Using cached markers for {}", executable.display());
return Ok(serde_json::from_slice::<MarkerEnvironment>(&data)?);
}
Some(key)
} else {
None
}
} else {
None
};
// Otherwise, run the Python script.
debug!("Detecting markers for {}", executable.display());
let markers = detect_markers(executable)?;
// Write to the cache.
if let Some(cache) = cache {
if let Some(key) = key {
cacache::write_sync(cache, key, serde_json::to_vec(&markers)?)?;
}
}
Ok(markers)
}
/// Create a cache key for the Python executable, consisting of the executable's
/// last modified time and the executable's path.
fn cache_key(executable: &Path) -> Result<String> {
let modified = executable
.metadata()?
.modified()?
.duration_since(std::time::UNIX_EPOCH)?
.as_millis();
Ok(format!("puffin:v0:{}:{}", executable.display(), modified))
}
const CAPTURE_MARKERS_SCRIPT: &str = "
import os
import sys