Unify python interpreter abstractions (#178)

Previously, we had two python interpreter metadata structs, one in
gourgeist and one in puffin. Both would spawn a subprocess to query
overlapping metadata and both would appear in the cli crate, if you
weren't careful you could even have to different base interpreters at
once. This change unifies this to one set of metadata, queried and
cached once.

Another effect of this crate is proper separation of python interpreter
and venv. A base interpreter (such as `/usr/bin/python/`, but also pyenv
and conda installed python) has a set of metadata. A venv has a root and
inherits the base python metadata except for `sys.prefix`, which unlike
`sys.base_prefix`, gets set to the venv root. From the root and the
interpreter info we can compute the paths inside the venv. We can reuse
the interpreter info of the base interpreter when creating a venv
without having to query the newly created `python`.
This commit is contained in:
konsti 2023-10-25 22:11:36 +02:00 committed by GitHub
parent 1fbe328257
commit 889f6173cc
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
37 changed files with 515 additions and 584 deletions

View file

@ -11,12 +11,14 @@ license = { workspace = true }
[dependencies]
pep440_rs = { path = "../pep440-rs" }
pep508_rs = { path = "../pep508-rs" }
pep508_rs = { path = "../pep508-rs", features = ["serde"] }
platform-host = { path = "../platform-host" }
anyhow = { workspace = true }
cacache = { workspace = true }
fs-err = { workspace = true, features = ["tokio"] }
serde_json = { workspace = true }
thiserror = { workspace = true }
tokio = { workspace = true }
tracing = { workspace = true }
serde = { workspace = true, features = ["derive"] }

View file

@ -0,0 +1,39 @@
import json
import os
import platform
import sys
def format_full_version(info):
version = "{0.major}.{0.minor}.{0.micro}".format(info)
kind = info.releaselevel
if kind != "final":
version += kind[0] + str(info.serial)
return version
if hasattr(sys, "implementation"):
implementation_version = format_full_version(sys.implementation.version)
implementation_name = sys.implementation.name
else:
implementation_version = "0"
implementation_name = ""
markers = {
"implementation_name": implementation_name,
"implementation_version": implementation_version,
"os_name": os.name,
"platform_machine": platform.machine(),
"platform_python_implementation": platform.python_implementation(),
"platform_release": platform.release(),
"platform_system": platform.system(),
"platform_version": platform.version(),
"python_full_version": platform.python_version(),
"python_version": ".".join(platform.python_version_tuple()[:2]),
"sys_platform": sys.platform,
}
interpreter_info = {
"markers": markers,
"base_prefix": sys.base_prefix,
"base_exec_prefix": sys.base_exec_prefix,
}
print(json.dumps(interpreter_info))

View file

@ -0,0 +1,189 @@
use std::io;
use std::path::{Path, PathBuf};
use std::process::Command;
use pep440_rs::Version;
use serde::{Deserialize, Serialize};
use thiserror::Error;
use tracing::debug;
use crate::python_platform::PythonPlatform;
use pep508_rs::MarkerEnvironment;
use platform_host::Platform;
/// A Python executable and its associated platform markers.
#[derive(Debug, Clone)]
pub struct InterpreterInfo {
pub(crate) platform: PythonPlatform,
pub(crate) markers: MarkerEnvironment,
pub(crate) base_exec_prefix: PathBuf,
pub(crate) base_prefix: PathBuf,
}
impl InterpreterInfo {
pub fn query_cached(
executable: &Path,
platform: Platform,
cache: Option<&Path>,
) -> anyhow::Result<Self> {
let info = InterpreterQueryResult::query_cached(executable, cache)?;
debug_assert!(
info.base_prefix == info.base_exec_prefix,
"Not a venv python: {}, prefix: {}",
executable.display(),
info.base_prefix.display()
);
Ok(Self {
platform: PythonPlatform(platform),
markers: info.markers,
base_exec_prefix: info.base_exec_prefix,
base_prefix: info.base_prefix,
})
}
}
impl InterpreterInfo {
/// Returns the path to the Python virtual environment.
pub fn platform(&self) -> &Platform {
&self.platform
}
/// Returns the [`MarkerEnvironment`] for this Python executable.
pub fn markers(&self) -> &MarkerEnvironment {
&self.markers
}
/// Returns the Python version.
pub fn version(&self) -> &Version {
&self.markers.python_version.version
}
/// Returns the Python version as a simple tuple.
pub fn simple_version(&self) -> (u8, u8) {
(
u8::try_from(self.version().release[0]).expect("invalid major version"),
u8::try_from(self.version().release[1]).expect("invalid minor version"),
)
}
pub fn base_exec_prefix(&self) -> &Path {
&self.base_exec_prefix
}
pub fn base_prefix(&self) -> &Path {
&self.base_prefix
}
}
#[derive(Debug, Error)]
pub(crate) enum InterpreterQueryError {
#[error(transparent)]
IO(#[from] io::Error),
#[error("Failed to query python interpreter at {interpreter}")]
PythonSubcommand {
interpreter: PathBuf,
#[source]
err: io::Error,
},
}
#[derive(Deserialize, Serialize)]
pub(crate) struct InterpreterQueryResult {
pub(crate) markers: MarkerEnvironment,
pub(crate) base_exec_prefix: PathBuf,
pub(crate) base_prefix: PathBuf,
}
impl InterpreterQueryResult {
/// Return the resolved [`InterpreterQueryResult`] for the given Python executable.
pub(crate) fn query(interpreter: &Path) -> Result<Self, InterpreterQueryError> {
let output = Command::new(interpreter)
.args(["-c", include_str!("get_interpreter_info.py")])
.output()
.map_err(|err| InterpreterQueryError::PythonSubcommand {
interpreter: interpreter.to_path_buf(),
err,
})?;
// stderr isn't technically a criterion for success, but i don't know of any cases where there
// should be stderr output and if there is, we want to know
if !output.status.success() || !output.stderr.is_empty() {
return Err(InterpreterQueryError::PythonSubcommand {
interpreter: interpreter.to_path_buf(),
err: io::Error::new(
io::ErrorKind::Other,
format!(
"Querying python at {} failed with status {}:\n--- stdout:\n{}\n--- stderr:\n{}",
interpreter.display(),
output.status,
String::from_utf8_lossy(&output.stdout).trim(),
String::from_utf8_lossy(&output.stderr).trim()
),
)
});
}
let data = serde_json::from_slice::<Self>(&output.stdout).map_err(|err|
InterpreterQueryError::PythonSubcommand {
interpreter: interpreter.to_path_buf(),
err: io::Error::new(
io::ErrorKind::Other,
format!(
"Querying python at {} did not return the expected data ({}):\n--- stdout:\n{}\n--- stderr:\n{}",
interpreter.display(),
err,
String::from_utf8_lossy(&output.stdout).trim(),
String::from_utf8_lossy(&output.stderr).trim()
)
)
}
)?;
Ok(data)
}
/// A wrapper around [`markers::query_interpreter_info`] to cache the computed markers.
///
/// Running a Python script is (relatively) expensive, and the markers won't change
/// unless the Python executable changes, so we use the executable's last modified
/// time as a cache key.
pub(crate) fn query_cached(executable: &Path, cache: Option<&Path>) -> anyhow::Result<Self> {
// Read from the cache.
let key = if let Some(cache) = cache {
if let Ok(key) = cache_key(executable) {
if let Ok(data) = cacache::read_sync(cache, &key) {
debug!("Using cached markers for {}", executable.display());
return Ok(serde_json::from_slice::<Self>(&data)?);
}
Some(key)
} else {
None
}
} else {
None
};
// Otherwise, run the Python script.
debug!("Detecting markers for {}", executable.display());
let info = Self::query(executable)?;
// Write to the cache.
if let Some(cache) = cache {
if let Some(key) = key {
cacache::write_sync(cache, key, serde_json::to_vec(&info)?)?;
}
}
Ok(info)
}
}
/// Create a cache key for the Python executable, consisting of the executable's
/// last modified time and the executable's path.
fn cache_key(executable: &Path) -> anyhow::Result<String> {
let modified = executable
.metadata()?
.modified()?
.duration_since(std::time::UNIX_EPOCH)?
.as_millis();
Ok(format!("puffin:v0:{}:{}", executable.display(), modified))
}

View file

@ -1,103 +1,6 @@
use std::path::{Path, PathBuf};
pub use crate::interpreter_info::InterpreterInfo;
pub use crate::virtual_env::Virtualenv;
use anyhow::Result;
use pep440_rs::Version;
use pep508_rs::MarkerEnvironment;
use platform_host::Platform;
use crate::python_platform::PythonPlatform;
mod markers;
mod interpreter_info;
mod python_platform;
mod virtual_env;
/// A Python executable and its associated platform markers.
#[derive(Debug, Clone)]
pub struct PythonExecutable {
platform: PythonPlatform,
venv: PathBuf,
executable: PathBuf,
markers: MarkerEnvironment,
}
impl PythonExecutable {
/// Detect the current Python executable from the host environment.
pub fn from_env(platform: Platform, cache: Option<&Path>) -> Result<Self> {
let platform = PythonPlatform::from(platform);
let venv = virtual_env::detect_virtual_env(&platform)?;
let executable = platform.venv_python(&venv);
let markers = markers::detect_cached_markers(&executable, cache)?;
Ok(Self {
platform,
venv,
executable,
markers,
})
}
pub fn from_venv(platform: Platform, venv: &Path, cache: Option<&Path>) -> Result<Self> {
let platform = PythonPlatform::from(platform);
let executable = platform.venv_python(venv);
let markers = markers::detect_cached_markers(&executable, cache)?;
Ok(Self {
platform,
venv: venv.to_path_buf(),
executable,
markers,
})
}
/// Create a [`PythonExecutable`] for a venv with a known base [`PythonExecutable`].
#[must_use]
pub fn with_venv(&self, venv: &Path) -> Self {
let executable = self.platform.venv_python(venv);
Self {
venv: venv.to_path_buf(),
executable,
..self.clone()
}
}
/// Returns the path to the Python virtual environment.
pub fn platform(&self) -> &Platform {
&self.platform
}
/// Returns the path to the `site-packages` directory inside a virtual environment.
pub fn site_packages(&self) -> PathBuf {
self.platform
.venv_site_packages(self.venv(), self.simple_version())
}
/// Returns the path to the Python virtual environment.
pub fn venv(&self) -> &Path {
self.venv.as_path()
}
/// Returns the path to the Python executable.
pub fn executable(&self) -> &Path {
self.executable.as_path()
}
/// Returns the [`MarkerEnvironment`] for this Python executable.
pub fn markers(&self) -> &MarkerEnvironment {
&self.markers
}
/// Returns the Python version.
pub fn version(&self) -> &Version {
&self.markers.python_version.version
}
/// Returns the Python version as a simple tuple.
pub fn simple_version(&self) -> (u8, u8) {
(
u8::try_from(self.version().release[0]).expect("invalid major version"),
u8::try_from(self.version().release[1]).expect("invalid minor version"),
)
}
}

View file

@ -1,110 +0,0 @@
use std::ffi::OsStr;
use std::path::Path;
use std::process::{Command, Output};
use anyhow::{Context, Result};
use tracing::debug;
use pep508_rs::MarkerEnvironment;
/// Return the resolved [`MarkerEnvironment`] for the given Python executable.
pub(crate) fn detect_markers(python: impl AsRef<Path>) -> Result<MarkerEnvironment> {
let output = call_python(python.as_ref(), ["-c", CAPTURE_MARKERS_SCRIPT])?;
Ok(serde_json::from_slice::<MarkerEnvironment>(&output.stdout)?)
}
/// A wrapper around [`markers::detect_markers`] to cache the computed markers.
///
/// Running a Python script is (relatively) expensive, and the markers won't change
/// unless the Python executable changes, so we use the executable's last modified
/// time as a cache key.
pub(crate) fn detect_cached_markers(
executable: &Path,
cache: Option<&Path>,
) -> Result<MarkerEnvironment> {
// Read from the cache.
let key = if let Some(cache) = cache {
if let Ok(key) = cache_key(executable) {
if let Ok(data) = cacache::read_sync(cache, &key) {
debug!("Using cached markers for {}", executable.display());
return Ok(serde_json::from_slice::<MarkerEnvironment>(&data)?);
}
Some(key)
} else {
None
}
} else {
None
};
// Otherwise, run the Python script.
debug!("Detecting markers for {}", executable.display());
let markers = detect_markers(executable)?;
// Write to the cache.
if let Some(cache) = cache {
if let Some(key) = key {
cacache::write_sync(cache, key, serde_json::to_vec(&markers)?)?;
}
}
Ok(markers)
}
/// Create a cache key for the Python executable, consisting of the executable's
/// last modified time and the executable's path.
fn cache_key(executable: &Path) -> Result<String> {
let modified = executable
.metadata()?
.modified()?
.duration_since(std::time::UNIX_EPOCH)?
.as_millis();
Ok(format!("puffin:v0:{}:{}", executable.display(), modified))
}
const CAPTURE_MARKERS_SCRIPT: &str = "
import os
import sys
import platform
import json
def format_full_version(info):
version = '{0.major}.{0.minor}.{0.micro}'.format(info)
kind = info.releaselevel
if kind != 'final':
version += kind[0] + str(info.serial)
return version
if hasattr(sys, 'implementation'):
implementation_version = format_full_version(sys.implementation.version)
implementation_name = sys.implementation.name
else:
implementation_version = '0'
implementation_name = ''
bindings = {
'implementation_name': implementation_name,
'implementation_version': implementation_version,
'os_name': os.name,
'platform_machine': platform.machine(),
'platform_python_implementation': platform.python_implementation(),
'platform_release': platform.release(),
'platform_system': platform.system(),
'platform_version': platform.version(),
'python_full_version': platform.python_version(),
'python_version': '.'.join(platform.python_version_tuple()[:2]),
'sys_platform': sys.platform,
}
json.dump(bindings, sys.stdout)
sys.stdout.flush()
";
/// Run a Python script and return its output.
fn call_python<I, S>(python: &Path, args: I) -> Result<Output>
where
I: IntoIterator<Item = S>,
S: AsRef<OsStr>,
{
Command::new(python)
.args(args)
.output()
.context(format!("Failed to run `python` at: {:?}", &python))
}

View file

@ -6,22 +6,22 @@ use platform_host::{Os, Platform};
/// A Python-aware wrapper around [`Platform`].
#[derive(Debug, Clone, Eq, PartialEq)]
pub(crate) struct PythonPlatform(Platform);
pub(crate) struct PythonPlatform(pub(crate) Platform);
impl PythonPlatform {
/// Returns the path to the `python` executable inside a virtual environment.
pub(crate) fn venv_python(&self, venv_base: impl AsRef<Path>) -> PathBuf {
pub(crate) fn venv_python(&self, venv_root: impl AsRef<Path>) -> PathBuf {
let python = if matches!(self.0.os(), Os::Windows) {
"python.exe"
} else {
"python"
};
self.venv_bin_dir(venv_base).join(python)
self.venv_bin_dir(venv_root).join(python)
}
/// Returns the directory in which the binaries are stored inside a virtual environment.
pub(crate) fn venv_bin_dir(&self, venv_base: impl AsRef<Path>) -> PathBuf {
let venv = venv_base.as_ref();
pub(crate) fn venv_bin_dir(&self, venv_root: impl AsRef<Path>) -> PathBuf {
let venv = venv_root.as_ref();
if matches!(self.0.os(), Os::Windows) {
let bin_dir = venv.join("Scripts");
if bin_dir.join("python.exe").exists() {
@ -43,10 +43,10 @@ impl PythonPlatform {
/// Returns the path to the `site-packages` directory inside a virtual environment.
pub(crate) fn venv_site_packages(
&self,
venv_base: impl AsRef<Path>,
venv_root: impl AsRef<Path>,
version: (u8, u8),
) -> PathBuf {
let venv = venv_base.as_ref();
let venv = venv_root.as_ref();
if matches!(self.0.os(), Os::Windows) {
venv.join("Lib").join("site-packages")
} else {

View file

@ -1,11 +1,91 @@
use std::env;
use std::path::PathBuf;
use std::path::{Path, PathBuf};
use crate::InterpreterInfo;
use anyhow::{bail, Result};
use platform_host::Platform;
use tracing::debug;
use crate::python_platform::PythonPlatform;
/// A Python executable and its associated platform markers.
#[derive(Debug, Clone)]
pub struct Virtualenv {
root: PathBuf,
interpreter_info: InterpreterInfo,
}
impl Virtualenv {
/// Venv the current Python executable from the host environment.
pub fn from_env(platform: Platform, cache: Option<&Path>) -> Result<Self> {
let platform = PythonPlatform::from(platform);
let venv = detect_virtual_env(&platform)?;
let executable = platform.venv_python(&venv);
let interpreter_info = InterpreterInfo::query_cached(&executable, platform.0, cache)?;
Ok(Self {
root: venv,
interpreter_info,
})
}
pub fn from_virtualenv(platform: Platform, root: &Path, cache: Option<&Path>) -> Result<Self> {
let platform = PythonPlatform::from(platform);
let executable = platform.venv_python(root);
let interpreter_info = InterpreterInfo::query_cached(&executable, platform.0, cache)?;
Ok(Self {
root: root.to_path_buf(),
interpreter_info,
})
}
/// Creating a new venv from a python interpreter changes this
pub fn new_prefix(venv: &Path, interpreter_info: &InterpreterInfo) -> Self {
Self {
root: venv.to_path_buf(),
interpreter_info: InterpreterInfo {
base_prefix: venv.to_path_buf(),
..interpreter_info.clone()
},
}
}
/// Returns the location of the python interpreter
pub fn python_executable(&self) -> PathBuf {
#[cfg(unix)]
{
self.root.join("bin").join("python")
}
#[cfg(windows)]
{
self.0
.join("Scripts")
.join("python.exe")
.into_std_path_buf()
}
#[cfg(not(any(unix, windows)))]
{
compile_error!("Only windows and unix (linux, mac os, etc.) are supported")
}
}
pub fn root(&self) -> &Path {
&self.root
}
pub fn interpreter_info(&self) -> &InterpreterInfo {
&self.interpreter_info
}
/// Returns the path to the `site-packages` directory inside a virtual environment.
pub fn site_packages(&self) -> PathBuf {
self.interpreter_info
.platform
.venv_site_packages(&self.root, self.interpreter_info().simple_version())
}
}
/// Locate the current virtual environment.
pub(crate) fn detect_virtual_env(target: &PythonPlatform) -> Result<PathBuf> {
match (env::var_os("VIRTUAL_ENV"), env::var_os("CONDA_PREFIX")) {