mirror of
https://github.com/astral-sh/uv.git
synced 2025-08-04 02:48:17 +00:00
Include environment variables in interpreter info caching (#11601)
We want to use `sys.path` for package discovery (#2500, #9849). For that, we need to know the correct value of `sys.path`. `sys.path` is a runtime-changeable value, which gets influenced from a lot of different sources: Environment variables, CLI arguments, `.pth` files with scripting, `sys.path.append()` at runtime, a distributor patching Python, etc. We cannot capture them all accurately, especially since it's possible to change `sys.path` mid-execution. Instead, we do a best effort attempt at matching the user's expectation. The assumption is that package installation generally happens in venv site-packages, system/user site-packages (including pypy shipping packages with std), and `PYTHONPATH`. Specifically, we reuse `PYTHONPATH` as dedicated way for users to tell uv to include specific directories in package discovery. A common way to influence `sys.path` that is not using venvs is setting `PYTHONPATH`. To support this we're capturing `PYTHONPATH` as part of the cache invalidation, i.e. we refresh the interpreter metadata if it changed. For completeness, we're also capturing other environment variables documented as influencing `sys.path` or other fields in the interpreter info. This PR does not include reading registry values for `sys.path` additions on Windows as documented in https://docs.python.org/3.11/using/windows.html#finding-modules. It notably also does not include parsing of python CLI arguments, we only consider their environment variable versions for package installation and listing. We could try parsing CLI flags in `uv run python`, but we'd still miss them when Python is launched indirectly through a script, and it's more consistent to only consider uv's own arguments and environment variables, similar to uv's behavior in other places.
This commit is contained in:
parent
f394f72453
commit
da30cc4ec5
4 changed files with 142 additions and 43 deletions
|
@ -1,8 +0,0 @@
|
|||
use serde::{Deserialize, Serialize};
|
||||
use uv_cache_info::Timestamp;
|
||||
|
||||
#[derive(Deserialize, Serialize)]
|
||||
pub struct CachedByTimestamp<Data> {
|
||||
pub timestamp: Timestamp,
|
||||
pub data: Data,
|
||||
}
|
|
@ -18,7 +18,6 @@ use uv_fs::{cachedir, directories, LockedFile};
|
|||
use uv_normalize::PackageName;
|
||||
use uv_pypi_types::ResolutionMetadata;
|
||||
|
||||
pub use crate::by_timestamp::CachedByTimestamp;
|
||||
#[cfg(feature = "clap")]
|
||||
pub use crate::cli::CacheArgs;
|
||||
use crate::removal::Remover;
|
||||
|
@ -27,7 +26,6 @@ pub use crate::wheel::WheelCache;
|
|||
use crate::wheel::WheelCacheKind;
|
||||
|
||||
mod archive;
|
||||
mod by_timestamp;
|
||||
#[cfg(feature = "clap")]
|
||||
mod cli;
|
||||
mod removal;
|
||||
|
@ -1034,7 +1032,7 @@ impl CacheBucket {
|
|||
Self::SourceDistributions => "sdists-v8",
|
||||
Self::FlatIndex => "flat-index-v2",
|
||||
Self::Git => "git-v0",
|
||||
Self::Interpreter => "interpreter-v4",
|
||||
Self::Interpreter => "interpreter-v5",
|
||||
// Note that when bumping this, you'll also need to bump it
|
||||
// in `crates/uv/tests/it/cache_clean.rs`.
|
||||
Self::Simple => "simple-v15",
|
||||
|
|
|
@ -1,10 +1,11 @@
|
|||
use std::borrow::Cow;
|
||||
use std::env::consts::ARCH;
|
||||
use std::ffi::OsString;
|
||||
use std::fmt::{Display, Formatter};
|
||||
use std::io;
|
||||
use std::path::{Path, PathBuf};
|
||||
use std::process::{Command, ExitStatus};
|
||||
use std::sync::OnceLock;
|
||||
use std::{env, io};
|
||||
|
||||
use configparser::ini::Ini;
|
||||
use fs_err as fs;
|
||||
|
@ -14,7 +15,7 @@ use serde::{Deserialize, Serialize};
|
|||
use thiserror::Error;
|
||||
use tracing::{debug, trace, warn};
|
||||
|
||||
use uv_cache::{Cache, CacheBucket, CachedByTimestamp, Freshness};
|
||||
use uv_cache::{Cache, CacheBucket, CacheEntry, Freshness};
|
||||
use uv_cache_info::Timestamp;
|
||||
use uv_cache_key::cache_digest;
|
||||
use uv_fs::{write_atomic_sync, PythonExt, Simplified};
|
||||
|
@ -24,6 +25,7 @@ use uv_pep508::{MarkerEnvironment, StringVersion};
|
|||
use uv_platform_tags::Platform;
|
||||
use uv_platform_tags::{Tags, TagsError};
|
||||
use uv_pypi_types::{ResolverMarkerEnvironment, Scheme};
|
||||
use uv_static::EnvVars;
|
||||
|
||||
use crate::implementation::LenientImplementationName;
|
||||
use crate::platform::{Arch, Libc, Os};
|
||||
|
@ -713,6 +715,42 @@ pub enum InterpreterInfoError {
|
|||
},
|
||||
}
|
||||
|
||||
/// Environment variables that can change the values of [`InterpreterInfo`].
|
||||
#[derive(Debug, Deserialize, Serialize, Clone, PartialEq, Eq)]
|
||||
struct PythonEnvVars {
|
||||
/// `PYTHONHOME` overrides `sys.prefix`.
|
||||
pythonhome: Option<OsString>,
|
||||
/// `PYTHONPATH` adds to `sys.path`.
|
||||
pythonpath: Option<OsString>,
|
||||
/// `PYTHONSAFEPATH` influences `sys.path`.
|
||||
pythonsafepath: Option<OsString>,
|
||||
/// `PYTHONPLATLIBDIR` influences `sys.path`.
|
||||
pythonplatlibdir: Option<OsString>,
|
||||
/// `PYTHONNOUSERSITE` influences `sys.path`.
|
||||
pythonnousersite: Option<OsString>,
|
||||
/// `PYTHONUSERBASE` influences `sys.path`.
|
||||
pythonuserbase: Option<OsString>,
|
||||
/// `APPDATA` influences `sys.path` through the user site packages (windows).
|
||||
appdata: Option<OsString>,
|
||||
/// `HOME` influences `sys.path` through the user site packages (unix).
|
||||
home: Option<OsString>,
|
||||
}
|
||||
|
||||
impl PythonEnvVars {
|
||||
fn from_env() -> Self {
|
||||
Self {
|
||||
pythonhome: env::var_os(EnvVars::PYTHONHOME),
|
||||
pythonpath: env::var_os(EnvVars::PYTHONPATH),
|
||||
pythonsafepath: env::var_os(EnvVars::PYTHONSAFEPATH),
|
||||
pythonplatlibdir: env::var_os(EnvVars::PYTHONPLATLIBDIR),
|
||||
pythonnousersite: env::var_os(EnvVars::PYTHONNOUSERSITE),
|
||||
pythonuserbase: env::var_os(EnvVars::PYTHONUSERBASE),
|
||||
appdata: env::var_os(EnvVars::APPDATA),
|
||||
home: env::var_os(EnvVars::HOME),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserialize, Serialize, Clone)]
|
||||
struct InterpreterInfo {
|
||||
platform: Platform,
|
||||
|
@ -732,6 +770,18 @@ struct InterpreterInfo {
|
|||
gil_disabled: bool,
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserialize, Serialize, Clone)]
|
||||
struct CachedInterpreterInfo {
|
||||
/// Information about a Python interpreter at a path.
|
||||
data: InterpreterInfo,
|
||||
/// The last modified timestamp of the Python interpreter path.
|
||||
///
|
||||
/// It is ctime on unix.
|
||||
timestamp: Timestamp,
|
||||
/// Environment variables that can influence the other keys used for cache invalidation.
|
||||
env_vars: PythonEnvVars,
|
||||
}
|
||||
|
||||
impl InterpreterInfo {
|
||||
/// Return the resolved [`InterpreterInfo`] for the given Python executable.
|
||||
pub(crate) fn query(interpreter: &Path, cache: &Cache) -> Result<Self, Error> {
|
||||
|
@ -869,36 +919,10 @@ impl InterpreterInfo {
|
|||
})?;
|
||||
|
||||
// Read from the cache.
|
||||
if cache
|
||||
.freshness(&cache_entry, None)
|
||||
.is_ok_and(Freshness::is_fresh)
|
||||
if let Some(value) =
|
||||
Self::read_and_validate_cache(executable, cache, &cache_entry, modified)
|
||||
{
|
||||
if let Ok(data) = fs::read(cache_entry.path()) {
|
||||
match rmp_serde::from_slice::<CachedByTimestamp<Self>>(&data) {
|
||||
Ok(cached) => {
|
||||
if cached.timestamp == modified {
|
||||
trace!(
|
||||
"Cached interpreter info for Python {}, skipping probing: {}",
|
||||
cached.data.markers.python_full_version(),
|
||||
executable.user_display()
|
||||
);
|
||||
return Ok(cached.data);
|
||||
}
|
||||
|
||||
trace!(
|
||||
"Ignoring stale interpreter markers for: {}",
|
||||
executable.user_display()
|
||||
);
|
||||
}
|
||||
Err(err) => {
|
||||
warn!(
|
||||
"Broken interpreter cache entry at {}, removing: {err}",
|
||||
cache_entry.path().user_display()
|
||||
);
|
||||
let _ = fs_err::remove_file(cache_entry.path());
|
||||
}
|
||||
}
|
||||
}
|
||||
return Ok(value);
|
||||
}
|
||||
|
||||
// Otherwise, run the Python script.
|
||||
|
@ -914,15 +938,82 @@ impl InterpreterInfo {
|
|||
fs::create_dir_all(cache_entry.dir())?;
|
||||
write_atomic_sync(
|
||||
cache_entry.path(),
|
||||
rmp_serde::to_vec(&CachedByTimestamp {
|
||||
rmp_serde::to_vec(&CachedInterpreterInfo {
|
||||
timestamp: modified,
|
||||
data: info.clone(),
|
||||
env_vars: PythonEnvVars::from_env(),
|
||||
})?,
|
||||
)?;
|
||||
}
|
||||
|
||||
Ok(info)
|
||||
}
|
||||
|
||||
/// If a cache entry for the Python interpreter exists and it's fresh, return it.
|
||||
fn read_and_validate_cache(
|
||||
executable: &Path,
|
||||
cache: &Cache,
|
||||
cache_entry: &CacheEntry,
|
||||
modified: Timestamp,
|
||||
) -> Option<InterpreterInfo> {
|
||||
if !cache
|
||||
.freshness(cache_entry, None)
|
||||
.is_ok_and(Freshness::is_fresh)
|
||||
{
|
||||
return None;
|
||||
}
|
||||
|
||||
let data = match fs::read(cache_entry.path()) {
|
||||
Ok(data) => data,
|
||||
Err(err) if err.kind() == io::ErrorKind::NotFound => {
|
||||
return None;
|
||||
}
|
||||
Err(err) => {
|
||||
warn!(
|
||||
"Broken interpreter cache entry at {}, removing: {err}",
|
||||
cache_entry.path().user_display()
|
||||
);
|
||||
let _ = fs_err::remove_file(cache_entry.path());
|
||||
return None;
|
||||
}
|
||||
};
|
||||
|
||||
let cached = match rmp_serde::from_slice::<CachedInterpreterInfo>(&data) {
|
||||
Ok(cached) => cached,
|
||||
Err(err) => {
|
||||
warn!(
|
||||
"Broken interpreter cache entry at {}, removing: {err}",
|
||||
cache_entry.path().user_display()
|
||||
);
|
||||
let _ = fs_err::remove_file(cache_entry.path());
|
||||
return None;
|
||||
}
|
||||
};
|
||||
|
||||
if cached.timestamp != modified {
|
||||
trace!(
|
||||
"Ignoring stale cached interpreter info for: `{}`",
|
||||
executable.user_display()
|
||||
);
|
||||
return None;
|
||||
}
|
||||
|
||||
if cached.env_vars != PythonEnvVars::from_env() {
|
||||
trace!(
|
||||
"Ignoring cached interpreter info due to changed environment variables for: `{}`",
|
||||
executable.user_display()
|
||||
);
|
||||
return None;
|
||||
}
|
||||
|
||||
trace!(
|
||||
"Cached interpreter info for Python {}, skipping probing: `{}`",
|
||||
cached.data.markers.python_full_version(),
|
||||
executable.user_display()
|
||||
);
|
||||
|
||||
Some(cached.data)
|
||||
}
|
||||
}
|
||||
|
||||
/// Find the Python executable that should be considered the "base" for a virtual environment.
|
||||
|
|
|
@ -649,4 +649,22 @@ impl EnvVars {
|
|||
///
|
||||
/// This is a quasi-standard variable, described e.g. in `ncurses(3x)`.
|
||||
pub const COLUMNS: &'static str = "COLUMNS";
|
||||
|
||||
/// Overrides `sys.prefix`.
|
||||
pub const PYTHONHOME: &'static str = "PYTHONHOME";
|
||||
|
||||
/// Don't prepend a potentially unsafe path to `sys.path`.
|
||||
pub const PYTHONSAFEPATH: &'static str = "PYTHONSAFEPATH";
|
||||
|
||||
/// Overrides `sys.platlibdir`.
|
||||
pub const PYTHONPLATLIBDIR: &'static str = "PYTHONPLATLIBDIR";
|
||||
|
||||
/// Don't add the user site packages to `sys.path`.
|
||||
pub const PYTHONNOUSERSITE: &'static str = "PYTHONNOUSERSITE";
|
||||
|
||||
/// Overrides `site.USER_BASE`.
|
||||
pub const PYTHONUSERBASE: &'static str = "PYTHONUSERBASE";
|
||||
|
||||
/// The base path for user site packages on Windows.
|
||||
pub const APPDATA: &'static str = "APPDATA";
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue