From 1c7c174bc80e9209bb52f975743c0cf55bc78e6d Mon Sep 17 00:00:00 2001 From: Zanie Blue Date: Mon, 30 Jun 2025 10:39:47 -0500 Subject: [PATCH] Include the canonical path in the interpreter query cache key (#14331) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This fixes an obscure cache collision in Python interpreter queries, which we believe to be the root cause of CI flakes we've been seeing where a project environment is invalidated and recreated. This work follows from the logs in [this CI run](https://github.com/astral-sh/uv/actions/runs/15934322410/job/44950599993?pr=14326) which captured one of the flakes with tracing enabled. There, we can see that the project environment is invalidated because the Python interpreter in the environment has a different version than expected: ``` DEBUG Checking for Python environment at `.venv` TRACE Cached interpreter info for Python 3.12.9, skipping probing: .venv/bin/python3 DEBUG The interpreter in the project environment has different version (3.12.9) than it was created with (3.9.21) ``` (this message is updated to reflect #14329) The flow is roughly: - We create an environment with 3.12.9 - We query the environment, and cache the interpreter version for `.venv/bin/python` - We create an environment for 3.9.12, replacing the existing one - We query the environment, and read the cached information The Python cache entries are keyed by the absolute path to the interpreter, and rely on the modification time (ctime, nsec resolution) of the canonicalized path to determine if the cache entry should be invalidated. The key is a hex representation of a u64 sea hasher output — which is very unlikely to collide. After an audit of the Python query caching logic, we determined that the most likely cause of a collision in cache entries is that the modification times of underlying interpreters are identical. This seems pretty feasible, especially if the file system does not support nanosecond precision — though it appears that the GitHub runners do support it. The fix here is to include the canonicalized path in the cache key, which ensures we're looking at the modification time of the _same_ underlying interpreter. This will "invalidate" all existing interpreter cache entries but that's not a big deal. This should also have the effect of reducing cache churn for interpreters in virtual environments. Now, when you change Python versions, we won't invalidate the previous cache entry so if you change _back_ to the old version we can re-use our cached information. It's a bit speculative, since we don't have a deterministic reproduction in CI, but this is the strongest candidate given the logs and should increase correctness regardless. Closes https://github.com/astral-sh/uv/issues/14160 Closes https://github.com/astral-sh/uv/issues/13744 Closes https://github.com/astral-sh/uv/issues/13745 Once it's confirmed the flakes are resolved, we should revert - https://github.com/astral-sh/uv/pull/14275 - #13817 --- crates/uv-python/src/interpreter.rs | 55 +++++++++++++++++------------ 1 file changed, 32 insertions(+), 23 deletions(-) diff --git a/crates/uv-python/src/interpreter.rs b/crates/uv-python/src/interpreter.rs index df27b497b..0f074ebb6 100644 --- a/crates/uv-python/src/interpreter.rs +++ b/crates/uv-python/src/interpreter.rs @@ -967,6 +967,31 @@ impl InterpreterInfo { pub(crate) fn query_cached(executable: &Path, cache: &Cache) -> Result { let absolute = std::path::absolute(executable)?; + // Provide a better error message if the link is broken or the file does not exist. Since + // `canonicalize_executable` does not resolve the file on Windows, we must re-use this logic + // for the subsequent metadata read as we may not have actually resolved the path. + let handle_io_error = |err: io::Error| -> Error { + if err.kind() == io::ErrorKind::NotFound { + // Check if it looks like a venv interpreter where the underlying Python + // installation was removed. + if absolute + .symlink_metadata() + .is_ok_and(|metadata| metadata.is_symlink()) + { + Error::BrokenSymlink(BrokenSymlink { + path: executable.to_path_buf(), + venv: uv_fs::is_virtualenv_executable(executable), + }) + } else { + Error::NotFound(executable.to_path_buf()) + } + } else { + err.into() + } + }; + + let canonical = canonicalize_executable(&absolute).map_err(handle_io_error)?; + let cache_entry = cache.entry( CacheBucket::Interpreter, // Shard interpreter metadata by host architecture, operating system, and version, to @@ -978,33 +1003,17 @@ impl InterpreterInfo { )), // We use the absolute path for the cache entry to avoid cache collisions for relative // paths. But we don't want to query the executable with symbolic links resolved because - // that can change reported values, e.g., `sys.executable`. - format!("{}.msgpack", cache_digest(&absolute)), + // that can change reported values, e.g., `sys.executable`. We include the canonical + // path in the cache entry as well, otherwise we can have cache collisions if an + // absolute path refers to different interpreters with matching ctimes, e.g., if you + // have a `.venv/bin/python` pointing to both Python 3.12 and Python 3.13 that were + // modified at the same time. + format!("{}.msgpack", cache_digest(&(&absolute, &canonical))), ); // We check the timestamp of the canonicalized executable to check if an underlying // interpreter has been modified. - let modified = canonicalize_executable(&absolute) - .and_then(Timestamp::from_path) - .map_err(|err| { - if err.kind() == io::ErrorKind::NotFound { - // Check if it looks like a venv interpreter where the underlying Python - // installation was removed. - if absolute - .symlink_metadata() - .is_ok_and(|metadata| metadata.is_symlink()) - { - Error::BrokenSymlink(BrokenSymlink { - path: executable.to_path_buf(), - venv: uv_fs::is_virtualenv_executable(executable), - }) - } else { - Error::NotFound(executable.to_path_buf()) - } - } else { - err.into() - } - })?; + let modified = Timestamp::from_path(canonical).map_err(handle_io_error)?; // Read from the cache. if cache