mirror of
https://github.com/astral-sh/uv.git
synced 2025-08-04 10:58:28 +00:00
Patch sysconfig
data at install time (#9857)
## Summary
This PR reimplements
[`sysconfigpatcher`](https://github.com/bluss/sysconfigpatcher) in Rust
and applies it to our Python installations at install-time, ensuring
that the `sysconfig` data is more likely to be correct.
For now, we only rewrite prefixes (i.e., any path that starts with
`/install` gets rewritten to the correct absolute path for the current
machine).
Unlike `sysconfigpatcher`, this PR does not yet do any of the following:
- Patch `pkginfo` files.
- Change `clang` references to `cc`.
A few things that we should do as follow-ups, in my opinion:
1. Rewrite
[`AR`](c1ebf8ab92/src/sysconfigpatcher.py (L61)
).
2. Remove `-isysroot`, which we already do for newer builds.
This commit is contained in:
parent
5903ce5759
commit
d2fb4c585d
13 changed files with 817 additions and 19 deletions
1
Cargo.lock
generated
1
Cargo.lock
generated
|
@ -5296,6 +5296,7 @@ dependencies = [
|
|||
"futures",
|
||||
"goblin",
|
||||
"indoc",
|
||||
"insta",
|
||||
"itertools 0.13.0",
|
||||
"owo-colors",
|
||||
"procfs",
|
||||
|
|
|
@ -72,6 +72,7 @@ windows-result = { workspace = true }
|
|||
anyhow = { version = "1.0.89" }
|
||||
assert_fs = { version = "1.1.2" }
|
||||
indoc = { workspace = true }
|
||||
insta = { version = "1.40.0" }
|
||||
itertools = { version = "0.13.0" }
|
||||
temp-env = { version = "0.3.6" }
|
||||
tempfile = { workspace = true }
|
||||
|
|
|
@ -573,7 +573,10 @@ def main() -> None:
|
|||
"sys_executable": sys.executable,
|
||||
"sys_path": sys.path,
|
||||
"stdlib": sysconfig.get_path("stdlib"),
|
||||
"sysconfig_prefix": sysconfig.get_config_var("prefix"),
|
||||
# Prior to the introduction of `sysconfig` patching, python-build-standalone installations would always use
|
||||
# "/install" as the prefix. With `sysconfig` patching, we rewrite the prefix to match the actual installation
|
||||
# location. So in newer versions, we also write a dedicated flag to indicate standalone builds.
|
||||
"standalone": sysconfig.get_config_var("prefix") == "/install" or bool(sysconfig.get_config_var("PYTHON_BUILD_STANDALONE")),
|
||||
"scheme": get_scheme(),
|
||||
"virtualenv": get_virtualenv(),
|
||||
"platform": os_and_arch,
|
||||
|
|
|
@ -2518,7 +2518,7 @@ fn disjunction(items: &[&str]) -> String {
|
|||
fn try_into_u8_slice(release: &[u64]) -> Result<Vec<u8>, std::num::TryFromIntError> {
|
||||
release
|
||||
.iter()
|
||||
.map(|x| match (*x).try_into() {
|
||||
.map(|x| match u8::try_from(*x) {
|
||||
Ok(x) => Ok(x),
|
||||
Err(e) => Err(e),
|
||||
})
|
||||
|
@ -2527,7 +2527,7 @@ fn try_into_u8_slice(release: &[u64]) -> Result<Vec<u8>, std::num::TryFromIntErr
|
|||
|
||||
/// Convert a wheel tag formatted version (e.g., `38`) to multiple components (e.g., `3.8`).
|
||||
///
|
||||
/// The major version is always assumed to be a single digit 0-9. The minor version is all of
|
||||
/// The major version is always assumed to be a single digit 0-9. The minor version is all
|
||||
/// the following content.
|
||||
///
|
||||
/// If not a wheel tag formatted version, the input is returned unchanged.
|
||||
|
|
|
@ -163,6 +163,7 @@ impl PythonInstallation {
|
|||
|
||||
let installed = ManagedPythonInstallation::new(path)?;
|
||||
installed.ensure_externally_managed()?;
|
||||
installed.ensure_sysconfig_patched()?;
|
||||
installed.ensure_canonical_executables()?;
|
||||
|
||||
Ok(Self {
|
||||
|
|
|
@ -46,7 +46,7 @@ pub struct Interpreter {
|
|||
sys_executable: PathBuf,
|
||||
sys_path: Vec<PathBuf>,
|
||||
stdlib: PathBuf,
|
||||
sysconfig_prefix: Option<PathBuf>,
|
||||
standalone: bool,
|
||||
tags: OnceLock<Tags>,
|
||||
target: Option<Target>,
|
||||
prefix: Option<Prefix>,
|
||||
|
@ -80,7 +80,7 @@ impl Interpreter {
|
|||
sys_executable: info.sys_executable,
|
||||
sys_path: info.sys_path,
|
||||
stdlib: info.stdlib,
|
||||
sysconfig_prefix: info.sysconfig_prefix,
|
||||
standalone: info.standalone,
|
||||
tags: OnceLock::new(),
|
||||
target: None,
|
||||
prefix: None,
|
||||
|
@ -368,11 +368,6 @@ impl Interpreter {
|
|||
&self.stdlib
|
||||
}
|
||||
|
||||
/// Return the `prefix` path for this Python interpreter, as returned by `sysconfig.get_config_var("prefix")`.
|
||||
pub fn sysconfig_prefix(&self) -> Option<&Path> {
|
||||
self.sysconfig_prefix.as_deref()
|
||||
}
|
||||
|
||||
/// Return the `purelib` path for this Python interpreter, as returned by `sysconfig.get_paths()`.
|
||||
pub fn purelib(&self) -> &Path {
|
||||
&self.scheme.purelib
|
||||
|
@ -441,8 +436,7 @@ impl Interpreter {
|
|||
///
|
||||
/// See: <https://github.com/indygreg/python-build-standalone/issues/382>
|
||||
pub fn is_standalone(&self) -> bool {
|
||||
self.sysconfig_prefix()
|
||||
.is_some_and(|prefix| prefix == Path::new("/install"))
|
||||
self.standalone
|
||||
}
|
||||
|
||||
/// Return the [`Layout`] environment used to install wheels into this interpreter.
|
||||
|
@ -626,7 +620,7 @@ struct InterpreterInfo {
|
|||
sys_executable: PathBuf,
|
||||
sys_path: Vec<PathBuf>,
|
||||
stdlib: PathBuf,
|
||||
sysconfig_prefix: Option<PathBuf>,
|
||||
standalone: bool,
|
||||
pointer_size: PointerSize,
|
||||
gil_disabled: bool,
|
||||
}
|
||||
|
@ -854,6 +848,7 @@ mod tests {
|
|||
"arch": "x86_64"
|
||||
},
|
||||
"manylinux_compatible": false,
|
||||
"standalone": false,
|
||||
"markers": {
|
||||
"implementation_name": "cpython",
|
||||
"implementation_version": "3.12.0",
|
||||
|
|
|
@ -8,7 +8,7 @@ pub use crate::discovery::{
|
|||
find_python_installations, EnvironmentPreference, Error as DiscoveryError, PythonDownloads,
|
||||
PythonNotFound, PythonPreference, PythonRequest, PythonSource, PythonVariant, VersionRequest,
|
||||
};
|
||||
pub use crate::environment::{InvalidEnvironment, InvalidEnvironmentKind, PythonEnvironment};
|
||||
pub use crate::environment::{InvalidEnvironmentKind, PythonEnvironment};
|
||||
pub use crate::implementation::ImplementationName;
|
||||
pub use crate::installation::{PythonInstallation, PythonInstallationKey};
|
||||
pub use crate::interpreter::{Error as InterpreterError, Interpreter};
|
||||
|
@ -39,6 +39,7 @@ mod prefix;
|
|||
#[cfg(windows)]
|
||||
mod py_launcher;
|
||||
mod python_version;
|
||||
mod sysconfig;
|
||||
mod target;
|
||||
mod version_files;
|
||||
mod virtualenv;
|
||||
|
@ -220,6 +221,7 @@ mod tests {
|
|||
"arch": "x86_64"
|
||||
},
|
||||
"manylinux_compatible": true,
|
||||
"standalone": true,
|
||||
"markers": {
|
||||
"implementation_name": "{IMPLEMENTATION}",
|
||||
"implementation_version": "{FULL_VERSION}",
|
||||
|
|
|
@ -25,7 +25,7 @@ use crate::libc::LibcDetectionError;
|
|||
use crate::platform::Error as PlatformError;
|
||||
use crate::platform::{Arch, Libc, Os};
|
||||
use crate::python_version::PythonVersion;
|
||||
use crate::{PythonRequest, PythonVariant};
|
||||
use crate::{sysconfig, PythonRequest, PythonVariant};
|
||||
#[derive(Error, Debug)]
|
||||
pub enum Error {
|
||||
#[error(transparent)]
|
||||
|
@ -40,6 +40,8 @@ pub enum Error {
|
|||
InvalidPythonVersion(String),
|
||||
#[error(transparent)]
|
||||
ExtractError(#[from] uv_extract::Error),
|
||||
#[error(transparent)]
|
||||
SysconfigError(#[from] sysconfig::Error),
|
||||
#[error("Failed to copy to: {0}", to.user_display())]
|
||||
CopyError {
|
||||
to: PathBuf,
|
||||
|
@ -491,6 +493,21 @@ impl ManagedPythonInstallation {
|
|||
Ok(())
|
||||
}
|
||||
|
||||
/// Ensure that the `sysconfig` data is patched to match the installation path.
|
||||
pub fn ensure_sysconfig_patched(&self) -> Result<(), Error> {
|
||||
if cfg!(unix) {
|
||||
if *self.implementation() == ImplementationName::CPython {
|
||||
sysconfig::update_sysconfig(
|
||||
self.path(),
|
||||
self.key.major,
|
||||
self.key.minor,
|
||||
self.key.variant.suffix(),
|
||||
)?;
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Create a link to the managed Python executable.
|
||||
///
|
||||
/// If the file already exists at the target path, an error will be returned.
|
||||
|
|
148
crates/uv-python/src/sysconfig/cursor.rs
Normal file
148
crates/uv-python/src/sysconfig/cursor.rs
Normal file
|
@ -0,0 +1,148 @@
|
|||
#![allow(dead_code)]
|
||||
|
||||
use std::str::Chars;
|
||||
|
||||
pub(super) const EOF_CHAR: char = '\0';
|
||||
|
||||
/// A cursor represents a pointer in the source code.
|
||||
///
|
||||
/// Based on [`rustc`'s `Cursor`](https://github.com/rust-lang/rust/blob/d1b7355d3d7b4ead564dbecb1d240fcc74fff21b/compiler/rustc_lexer/src/cursor.rs)
|
||||
#[derive(Clone, Debug)]
|
||||
pub(super) struct Cursor<'src> {
|
||||
/// An iterator over the [`char`]'s of the source code.
|
||||
chars: Chars<'src>,
|
||||
|
||||
/// Stores the previous character for debug assertions.
|
||||
#[cfg(debug_assertions)]
|
||||
prev_char: char,
|
||||
}
|
||||
|
||||
impl<'src> Cursor<'src> {
|
||||
pub(super) fn new(source: &'src str) -> Self {
|
||||
Self {
|
||||
chars: source.chars(),
|
||||
#[cfg(debug_assertions)]
|
||||
prev_char: EOF_CHAR,
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns the previous character. Useful for debug assertions.
|
||||
#[cfg(debug_assertions)]
|
||||
pub(super) const fn previous(&self) -> char {
|
||||
self.prev_char
|
||||
}
|
||||
|
||||
/// Peeks the next character from the input stream without consuming it.
|
||||
/// Returns [`EOF_CHAR`] if the position is past the end of the file.
|
||||
pub(super) fn first(&self) -> char {
|
||||
self.chars.clone().next().unwrap_or(EOF_CHAR)
|
||||
}
|
||||
|
||||
/// Peeks the second character from the input stream without consuming it.
|
||||
/// Returns [`EOF_CHAR`] if the position is past the end of the file.
|
||||
pub(super) fn second(&self) -> char {
|
||||
let mut chars = self.chars.clone();
|
||||
chars.next();
|
||||
chars.next().unwrap_or(EOF_CHAR)
|
||||
}
|
||||
|
||||
/// Returns the remaining text to lex.
|
||||
///
|
||||
/// Use [`Cursor::text_len`] to get the length of the remaining text.
|
||||
pub(super) fn rest(&self) -> &'src str {
|
||||
self.chars.as_str()
|
||||
}
|
||||
|
||||
/// Returns `true` if the cursor is at the end of file.
|
||||
pub(super) fn is_eof(&self) -> bool {
|
||||
self.chars.as_str().is_empty()
|
||||
}
|
||||
|
||||
/// Moves the cursor to the next character, returning the previous character.
|
||||
/// Returns [`None`] if there is no next character.
|
||||
pub(super) fn bump(&mut self) -> Option<char> {
|
||||
let prev = self.chars.next()?;
|
||||
|
||||
#[cfg(debug_assertions)]
|
||||
{
|
||||
self.prev_char = prev;
|
||||
}
|
||||
|
||||
Some(prev)
|
||||
}
|
||||
|
||||
pub(super) fn eat_char(&mut self, c: char) -> bool {
|
||||
if self.first() == c {
|
||||
self.bump();
|
||||
true
|
||||
} else {
|
||||
false
|
||||
}
|
||||
}
|
||||
|
||||
pub(super) fn eat_char2(&mut self, c1: char, c2: char) -> bool {
|
||||
let mut chars = self.chars.clone();
|
||||
if chars.next() == Some(c1) && chars.next() == Some(c2) {
|
||||
self.bump();
|
||||
self.bump();
|
||||
true
|
||||
} else {
|
||||
false
|
||||
}
|
||||
}
|
||||
|
||||
pub(super) fn eat_char3(&mut self, c1: char, c2: char, c3: char) -> bool {
|
||||
let mut chars = self.chars.clone();
|
||||
if chars.next() == Some(c1) && chars.next() == Some(c2) && chars.next() == Some(c3) {
|
||||
self.bump();
|
||||
self.bump();
|
||||
self.bump();
|
||||
true
|
||||
} else {
|
||||
false
|
||||
}
|
||||
}
|
||||
|
||||
pub(super) fn eat_if<F>(&mut self, mut predicate: F) -> Option<char>
|
||||
where
|
||||
F: FnMut(char) -> bool,
|
||||
{
|
||||
if predicate(self.first()) && !self.is_eof() {
|
||||
self.bump()
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
/// Eats symbols while predicate returns true or until the end of file is reached.
|
||||
#[inline]
|
||||
pub(super) fn eat_while(&mut self, mut predicate: impl FnMut(char) -> bool) {
|
||||
// It was tried making optimized version of this for eg. line comments, but
|
||||
// LLVM can inline all of this and compile it down to fast iteration over bytes.
|
||||
while predicate(self.first()) && !self.is_eof() {
|
||||
self.bump();
|
||||
}
|
||||
}
|
||||
|
||||
/// Skips the next `count` bytes.
|
||||
///
|
||||
/// ## Panics
|
||||
/// - If `count` is larger than the remaining bytes in the input stream.
|
||||
/// - If `count` indexes into a multi-byte character.
|
||||
pub(super) fn skip_bytes(&mut self, count: usize) {
|
||||
#[cfg(debug_assertions)]
|
||||
{
|
||||
self.prev_char = self.chars.as_str()[..count]
|
||||
.chars()
|
||||
.next_back()
|
||||
.unwrap_or('\0');
|
||||
}
|
||||
|
||||
self.chars = self.chars.as_str()[count..].chars();
|
||||
}
|
||||
|
||||
/// Skips to the end of the input stream.
|
||||
pub(super) fn skip_to_end(&mut self) {
|
||||
self.chars = "".chars();
|
||||
}
|
||||
}
|
213
crates/uv-python/src/sysconfig/mod.rs
Normal file
213
crates/uv-python/src/sysconfig/mod.rs
Normal file
|
@ -0,0 +1,213 @@
|
|||
//! Patch `sysconfig` data in a Python installation.
|
||||
//!
|
||||
//! Inspired by: <https://github.com/bluss/sysconfigpatcher/blob/c1ebf8ab9274dcde255484d93ce0f1fd1f76a248/src/sysconfigpatcher.py#L137C1-L140C100>,
|
||||
//! available under the MIT license:
|
||||
//!
|
||||
//! ```text
|
||||
//! Copyright 2024 Ulrik Sverdrup "bluss"
|
||||
//!
|
||||
//! Permission is hereby granted, free of charge, to any person obtaining a copy of
|
||||
//! this software and associated documentation files (the "Software"), to deal in
|
||||
//! the Software without restriction, including without limitation the rights to
|
||||
//! use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
|
||||
//! the Software, and to permit persons to whom the Software is furnished to do so,
|
||||
//! subject to the following conditions:
|
||||
//!
|
||||
//! The above copyright notice and this permission notice shall be included in all
|
||||
//! copies or substantial portions of the Software.
|
||||
//!
|
||||
//! THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
//! IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
|
||||
//! FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
|
||||
//! COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
|
||||
//! IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
//! CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
//! ```
|
||||
|
||||
use std::io::Write;
|
||||
use std::path::{Path, PathBuf};
|
||||
use std::str::FromStr;
|
||||
|
||||
use tracing::trace;
|
||||
|
||||
use crate::sysconfig::parser::{Error as ParseError, SysconfigData, Value};
|
||||
|
||||
mod cursor;
|
||||
mod parser;
|
||||
|
||||
/// Update the `sysconfig` data in a Python installation.
|
||||
pub(crate) fn update_sysconfig(
|
||||
install_root: &Path,
|
||||
major: u8,
|
||||
minor: u8,
|
||||
suffix: &str,
|
||||
) -> Result<(), Error> {
|
||||
// Find the `_sysconfigdata_` file in the Python installation.
|
||||
let real_prefix = std::path::absolute(install_root)?;
|
||||
let sysconfigdata = find_sysconfigdata(&real_prefix, major, minor, suffix)?;
|
||||
trace!(
|
||||
"Discovered `sysconfig` data at: {}",
|
||||
sysconfigdata.display()
|
||||
);
|
||||
|
||||
// Update the `_sysconfigdata_` file in-memory.
|
||||
let contents = fs_err::read_to_string(&sysconfigdata)?;
|
||||
let data = SysconfigData::from_str(&contents)?;
|
||||
let data = patch_sysconfigdata(data, &real_prefix);
|
||||
let contents = data.to_string_pretty()?;
|
||||
|
||||
// Write the updated `_sysconfigdata_` file.
|
||||
let mut file = fs_err::OpenOptions::new()
|
||||
.write(true)
|
||||
.truncate(true)
|
||||
.create(true)
|
||||
.open(&sysconfigdata)?;
|
||||
file.write_all(contents.as_bytes())?;
|
||||
file.sync_data()?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Find the `_sysconfigdata_` file in a Python installation.
|
||||
///
|
||||
/// For example, on macOS, returns `{real_prefix}/lib/python3.12/_sysconfigdata__darwin_darwin.py"`.
|
||||
fn find_sysconfigdata(
|
||||
real_prefix: &Path,
|
||||
major: u8,
|
||||
minor: u8,
|
||||
suffix: &str,
|
||||
) -> Result<PathBuf, Error> {
|
||||
// Find the `lib` directory in the Python installation.
|
||||
let lib = real_prefix
|
||||
.join("lib")
|
||||
.join(format!("python{major}.{minor}{suffix}"));
|
||||
if !lib.exists() {
|
||||
return Err(Error::MissingLib);
|
||||
}
|
||||
|
||||
// Probe the `lib` directory for `_sysconfigdata_`.
|
||||
for entry in lib.read_dir()? {
|
||||
let entry = entry?;
|
||||
|
||||
if entry.path().extension().is_none_or(|ext| ext != "py") {
|
||||
continue;
|
||||
}
|
||||
|
||||
if !entry
|
||||
.path()
|
||||
.file_stem()
|
||||
.and_then(|stem| stem.to_str())
|
||||
.is_some_and(|stem| stem.starts_with("_sysconfigdata_"))
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
let metadata = entry.metadata()?;
|
||||
if metadata.is_symlink() {
|
||||
continue;
|
||||
};
|
||||
|
||||
if metadata.is_file() {
|
||||
return Ok(entry.path());
|
||||
}
|
||||
}
|
||||
|
||||
Err(Error::MissingSysconfigdata)
|
||||
}
|
||||
|
||||
/// Patch the given `_sysconfigdata_` contents.
|
||||
fn patch_sysconfigdata(mut data: SysconfigData, real_prefix: &Path) -> SysconfigData {
|
||||
/// Update the `/install` prefix in a whitespace-separated string.
|
||||
fn update_prefix(s: &str, real_prefix: &Path) -> String {
|
||||
s.split_whitespace()
|
||||
.map(|part| {
|
||||
if let Some(rest) = part.strip_prefix("/install") {
|
||||
if rest.is_empty() {
|
||||
real_prefix.display().to_string()
|
||||
} else {
|
||||
real_prefix.join(&rest[1..]).display().to_string()
|
||||
}
|
||||
} else {
|
||||
part.to_string()
|
||||
}
|
||||
})
|
||||
.collect::<Vec<_>>()
|
||||
.join(" ")
|
||||
}
|
||||
|
||||
// Patch each value, as needed.
|
||||
let mut count = 0;
|
||||
for (key, value) in data.iter_mut() {
|
||||
let Value::String(value) = value else {
|
||||
continue;
|
||||
};
|
||||
let patched = update_prefix(value, real_prefix);
|
||||
if *value != patched {
|
||||
trace!("Updated `{key}` from `{value}` to `{patched}`");
|
||||
count += 1;
|
||||
*value = patched;
|
||||
}
|
||||
}
|
||||
|
||||
match count {
|
||||
0 => trace!("No updates required"),
|
||||
1 => trace!("Updated 1 value"),
|
||||
n => trace!("Updated {n} values"),
|
||||
}
|
||||
|
||||
// Mark the Python installation as standalone.
|
||||
data.insert("PYTHON_BUILD_STANDALONE".to_string(), Value::Int(1));
|
||||
|
||||
data
|
||||
}
|
||||
|
||||
#[derive(thiserror::Error, Debug)]
|
||||
pub enum Error {
|
||||
#[error(transparent)]
|
||||
Io(#[from] std::io::Error),
|
||||
#[error("Python installation is missing a `lib` directory")]
|
||||
MissingLib,
|
||||
#[error("Python installation is missing a `_sysconfigdata_` file")]
|
||||
MissingSysconfigdata,
|
||||
#[error(transparent)]
|
||||
Parse(#[from] ParseError),
|
||||
#[error(transparent)]
|
||||
Json(#[from] serde_json::Error),
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
#[cfg(unix)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn update_real_prefix() -> Result<(), Error> {
|
||||
let sysconfigdata = [
|
||||
("BASEMODLIBS", ""),
|
||||
("BUILDPYTHON", "python.exe"),
|
||||
("prefix", "/install/prefix"),
|
||||
("exec_prefix", "/install/exec_prefix"),
|
||||
("base", "/install/base"),
|
||||
]
|
||||
.into_iter()
|
||||
.map(|(k, v)| (k.to_string(), Value::String(v.to_string())))
|
||||
.collect::<SysconfigData>();
|
||||
|
||||
let real_prefix = Path::new("/real/prefix");
|
||||
let data = patch_sysconfigdata(sysconfigdata, real_prefix);
|
||||
|
||||
insta::assert_snapshot!(data.to_string_pretty()?, @r###"
|
||||
# system configuration generated and used by the sysconfig module
|
||||
build_time_vars = {
|
||||
"BASEMODLIBS": "",
|
||||
"BUILDPYTHON": "python.exe",
|
||||
"PYTHON_BUILD_STANDALONE": 1,
|
||||
"base": "/real/prefix/base",
|
||||
"exec_prefix": "/real/prefix/exec_prefix",
|
||||
"prefix": "/real/prefix/prefix"
|
||||
}
|
||||
"###);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
410
crates/uv-python/src/sysconfig/parser.rs
Normal file
410
crates/uv-python/src/sysconfig/parser.rs
Normal file
|
@ -0,0 +1,410 @@
|
|||
use std::collections::BTreeMap;
|
||||
use std::str::FromStr;
|
||||
|
||||
use serde::Serialize;
|
||||
use serde_json::ser::PrettyFormatter;
|
||||
|
||||
use crate::sysconfig::cursor::{Cursor, EOF_CHAR};
|
||||
|
||||
/// A value in the [`SysconfigData`] map.
|
||||
///
|
||||
/// Values are assumed to be either strings or integers.
|
||||
#[derive(Debug, Clone, Eq, PartialEq, serde::Serialize)]
|
||||
#[serde(untagged)]
|
||||
pub(super) enum Value {
|
||||
String(String),
|
||||
Int(i32),
|
||||
}
|
||||
|
||||
/// The data extracted from a `_sysconfigdata_` file.
|
||||
#[derive(Debug, Clone, Eq, PartialEq, serde::Serialize)]
|
||||
pub(super) struct SysconfigData(BTreeMap<String, Value>);
|
||||
|
||||
impl SysconfigData {
|
||||
/// Returns an iterator over the key-value pairs in the map.
|
||||
pub(super) fn iter_mut(&mut self) -> std::collections::btree_map::IterMut<String, Value> {
|
||||
self.0.iter_mut()
|
||||
}
|
||||
|
||||
/// Inserts a key-value pair into the map.
|
||||
pub(super) fn insert(&mut self, key: String, value: Value) -> Option<Value> {
|
||||
self.0.insert(key, value)
|
||||
}
|
||||
|
||||
/// Formats the `sysconfig` data as a pretty-printed string.
|
||||
pub(super) fn to_string_pretty(&self) -> Result<String, serde_json::Error> {
|
||||
let output = {
|
||||
let mut buf = Vec::new();
|
||||
let mut serializer = serde_json::Serializer::with_formatter(
|
||||
&mut buf,
|
||||
PrettyFormatter::with_indent(b" "),
|
||||
);
|
||||
self.0.serialize(&mut serializer)?;
|
||||
String::from_utf8(buf).unwrap()
|
||||
};
|
||||
Ok(format!(
|
||||
"# system configuration generated and used by the sysconfig module\nbuild_time_vars = {output}\n",
|
||||
))
|
||||
}
|
||||
}
|
||||
|
||||
impl std::fmt::Display for SysconfigData {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
let output = {
|
||||
let mut buf = Vec::new();
|
||||
let mut serializer = serde_json::Serializer::new(&mut buf);
|
||||
self.0.serialize(&mut serializer).unwrap();
|
||||
String::from_utf8(buf).unwrap()
|
||||
};
|
||||
write!(f, "{output}",)
|
||||
}
|
||||
}
|
||||
|
||||
impl FromIterator<(String, Value)> for SysconfigData {
|
||||
fn from_iter<T: IntoIterator<Item = (String, Value)>>(iter: T) -> Self {
|
||||
Self(iter.into_iter().collect())
|
||||
}
|
||||
}
|
||||
|
||||
/// Parse the `_sysconfigdata_` file (e.g., `{real_prefix}/lib/python3.12/_sysconfigdata__darwin_darwin.py"`
|
||||
/// on macOS).
|
||||
///
|
||||
/// `_sysconfigdata_` is structured as follows:
|
||||
///
|
||||
/// 1. A comment on the first line (e.g., `# system configuration generated and used by the sysconfig module`).
|
||||
/// 2. An assignment to `build_time_vars` (e.g., `build_time_vars = { ... }`).
|
||||
///
|
||||
/// The right-hand side of the assignment is a JSON object. The keys are strings, and the values
|
||||
/// are strings or numbers.
|
||||
impl FromStr for SysconfigData {
|
||||
type Err = Error;
|
||||
|
||||
fn from_str(s: &str) -> Result<Self, Self::Err> {
|
||||
// Read the first line of the file.
|
||||
let Some(s) =
|
||||
s.strip_prefix("# system configuration generated and used by the sysconfig module\n")
|
||||
else {
|
||||
return Err(Error::MissingHeader);
|
||||
};
|
||||
|
||||
// Read the assignment to `build_time_vars`.
|
||||
let Some(s) = s.strip_prefix("build_time_vars") else {
|
||||
return Err(Error::MissingAssignment);
|
||||
};
|
||||
|
||||
let mut cursor = Cursor::new(s);
|
||||
|
||||
cursor.eat_while(is_python_whitespace);
|
||||
if !cursor.eat_char('=') {
|
||||
return Err(Error::MissingAssignment);
|
||||
}
|
||||
cursor.eat_while(is_python_whitespace);
|
||||
|
||||
if !cursor.eat_char('{') {
|
||||
return Err(Error::MissingOpenBrace);
|
||||
}
|
||||
|
||||
let mut map = BTreeMap::new();
|
||||
loop {
|
||||
match cursor.first() {
|
||||
'\'' | '"' => {
|
||||
// Parse key.
|
||||
let key = parse_string(&mut cursor)?;
|
||||
|
||||
cursor.eat_while(is_python_whitespace);
|
||||
cursor.eat_char(':');
|
||||
cursor.eat_while(is_python_whitespace);
|
||||
|
||||
// Parse value
|
||||
let value = match cursor.first() {
|
||||
'\'' | '"' => Value::String(parse_concatenated_string(&mut cursor)?),
|
||||
'-' => {
|
||||
cursor.bump();
|
||||
Value::Int(-parse_int(&mut cursor)?)
|
||||
}
|
||||
c if c.is_ascii_digit() => Value::Int(parse_int(&mut cursor)?),
|
||||
c => return Err(Error::UnexpectedCharacter(c)),
|
||||
};
|
||||
|
||||
// Insert into map.
|
||||
map.insert(key, value);
|
||||
|
||||
// Skip optional comma.
|
||||
cursor.eat_while(is_python_whitespace);
|
||||
cursor.eat_char(',');
|
||||
cursor.eat_while(is_python_whitespace);
|
||||
}
|
||||
|
||||
// Skip whitespace.
|
||||
' ' | '\n' | '\r' | '\t' => {
|
||||
cursor.bump();
|
||||
}
|
||||
|
||||
// When we see a closing brace, we're done.
|
||||
'}' => {
|
||||
cursor.bump();
|
||||
break;
|
||||
}
|
||||
|
||||
c => return Err(Error::UnexpectedCharacter(c)),
|
||||
}
|
||||
}
|
||||
|
||||
Ok(Self(map))
|
||||
}
|
||||
}
|
||||
|
||||
/// Parse a Python string literal.
|
||||
fn parse_string(cursor: &mut Cursor) -> Result<String, Error> {
|
||||
let quote = cursor.bump().expect("Expected opening quote");
|
||||
assert!(quote == '\'' || quote == '"', "Invalid quote character");
|
||||
|
||||
let mut result = String::new();
|
||||
loop {
|
||||
if cursor.first() == EOF_CHAR {
|
||||
return Err(Error::UnexpectedCharacter(EOF_CHAR));
|
||||
}
|
||||
|
||||
// Handle escaped quotes.
|
||||
if cursor.first() == '\\' {
|
||||
// Consume the backslash.
|
||||
cursor.bump();
|
||||
if cursor.first() == quote {
|
||||
result.push(quote);
|
||||
cursor.bump();
|
||||
continue;
|
||||
}
|
||||
|
||||
// Keep the backslash and following character.
|
||||
result.push('\\');
|
||||
result.push(cursor.first());
|
||||
cursor.bump();
|
||||
continue;
|
||||
}
|
||||
|
||||
// Consume closing quote.
|
||||
if cursor.first() == quote {
|
||||
cursor.bump();
|
||||
break;
|
||||
}
|
||||
|
||||
result.push(cursor.first());
|
||||
cursor.bump();
|
||||
}
|
||||
Ok(result)
|
||||
}
|
||||
|
||||
/// Parse a Python string, which may be a concatenation of multiple string literals.
|
||||
fn parse_concatenated_string(cursor: &mut Cursor) -> Result<String, Error> {
|
||||
let mut result = String::new();
|
||||
loop {
|
||||
let c = cursor.first();
|
||||
if c == EOF_CHAR {
|
||||
break;
|
||||
}
|
||||
if c == '\'' || c == '"' {
|
||||
// Parse a new string fragment and append it.
|
||||
result.push_str(&parse_string(cursor)?);
|
||||
} else if is_python_whitespace(c) {
|
||||
// Skip whitespace between fragments
|
||||
cursor.bump();
|
||||
} else if c == ',' || c == '}' {
|
||||
// End of value.
|
||||
break;
|
||||
} else {
|
||||
return Err(Error::UnexpectedCharacter(c));
|
||||
}
|
||||
}
|
||||
Ok(result)
|
||||
}
|
||||
|
||||
/// Parse an integer literal.
|
||||
fn parse_int(cursor: &mut Cursor) -> Result<i32, std::num::ParseIntError> {
|
||||
let mut result = String::new();
|
||||
loop {
|
||||
let c = cursor.first();
|
||||
if c == EOF_CHAR {
|
||||
break;
|
||||
}
|
||||
if !c.is_ascii_digit() {
|
||||
break;
|
||||
}
|
||||
result.push(c);
|
||||
cursor.bump();
|
||||
}
|
||||
result.parse()
|
||||
}
|
||||
|
||||
/// Returns `true` for [whitespace](https://docs.python.org/3/reference/lexical_analysis.html#whitespace-between-tokens)
|
||||
/// characters.
|
||||
const fn is_python_whitespace(c: char) -> bool {
|
||||
matches!(
|
||||
c,
|
||||
// Space, tab, form-feed, newline, or carriage return
|
||||
' ' | '\t' | '\x0C' | '\n' | '\r'
|
||||
)
|
||||
}
|
||||
|
||||
#[derive(thiserror::Error, Debug)]
|
||||
pub enum Error {
|
||||
#[error("Missing opening brace")]
|
||||
MissingOpenBrace,
|
||||
#[error("Unexpected character: {0}")]
|
||||
UnexpectedCharacter(char),
|
||||
#[error("Failed to parse integer")]
|
||||
ParseInt(#[from] std::num::ParseIntError),
|
||||
#[error("`_sysconfigdata_` is missing a header comment")]
|
||||
MissingHeader,
|
||||
#[error("`_sysconfigdata_` is missing an assignment to `build_time_vars`")]
|
||||
MissingAssignment,
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_parse_string() {
|
||||
let input = indoc::indoc!(
|
||||
r#"
|
||||
# system configuration generated and used by the sysconfig module
|
||||
build_time_vars = {
|
||||
"key1": "value1",
|
||||
"key2": 42,
|
||||
"key3": "multi-part" " string"
|
||||
}
|
||||
"#
|
||||
);
|
||||
|
||||
let result = input.parse::<SysconfigData>().expect("Parsing failed");
|
||||
let snapshot = result.to_string_pretty().unwrap();
|
||||
|
||||
insta::assert_snapshot!(snapshot, @r###"
|
||||
# system configuration generated and used by the sysconfig module
|
||||
build_time_vars = {
|
||||
"key1": "value1",
|
||||
"key2": 42,
|
||||
"key3": "multi-part string"
|
||||
}
|
||||
"###);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_parse_integer_values() {
|
||||
let input = indoc::indoc!(
|
||||
r#"
|
||||
# system configuration generated and used by the sysconfig module
|
||||
build_time_vars = {
|
||||
"key1": 12345,
|
||||
"key2": -15
|
||||
}
|
||||
"#
|
||||
);
|
||||
|
||||
let result = input.parse::<SysconfigData>().expect("Parsing failed");
|
||||
let snapshot = result.to_string_pretty().unwrap();
|
||||
|
||||
insta::assert_snapshot!(snapshot, @r###"
|
||||
# system configuration generated and used by the sysconfig module
|
||||
build_time_vars = {
|
||||
"key1": 12345,
|
||||
"key2": -15
|
||||
}
|
||||
"###);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_parse_escaped_quotes() {
|
||||
let input = indoc::indoc!(
|
||||
r#"
|
||||
# system configuration generated and used by the sysconfig module
|
||||
build_time_vars = {
|
||||
"key1": "value with \"escaped quotes\"",
|
||||
"key2": 'single-quoted \'escaped\''
|
||||
}
|
||||
"#
|
||||
);
|
||||
|
||||
let result = input.parse::<SysconfigData>().expect("Parsing failed");
|
||||
let snapshot = result.to_string_pretty().unwrap();
|
||||
|
||||
insta::assert_snapshot!(snapshot, @r###"
|
||||
# system configuration generated and used by the sysconfig module
|
||||
build_time_vars = {
|
||||
"key1": "value with \"escaped quotes\"",
|
||||
"key2": "single-quoted 'escaped'"
|
||||
}
|
||||
"###);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_parse_concatenated_strings() {
|
||||
let input = indoc::indoc!(
|
||||
r#"
|
||||
# system configuration generated and used by the sysconfig module
|
||||
build_time_vars = {
|
||||
"key1": "multi-"
|
||||
"line "
|
||||
"string"
|
||||
}
|
||||
"#
|
||||
);
|
||||
|
||||
let result = input.parse::<SysconfigData>().expect("Parsing failed");
|
||||
let snapshot = result.to_string_pretty().unwrap();
|
||||
|
||||
insta::assert_snapshot!(snapshot, @r###"
|
||||
# system configuration generated and used by the sysconfig module
|
||||
build_time_vars = {
|
||||
"key1": "multi-line string"
|
||||
}
|
||||
"###);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_missing_header_error() {
|
||||
let input = indoc::indoc!(
|
||||
r#"
|
||||
build_time_vars = {
|
||||
"key1": "value1"
|
||||
}
|
||||
"#
|
||||
);
|
||||
|
||||
let result = input.parse::<SysconfigData>();
|
||||
assert!(matches!(result, Err(Error::MissingHeader)));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_missing_assignment_error() {
|
||||
let input = indoc::indoc!(
|
||||
r#"
|
||||
# system configuration generated and used by the sysconfig module
|
||||
{
|
||||
"key1": "value1"
|
||||
}
|
||||
"#
|
||||
);
|
||||
|
||||
let result = input.parse::<SysconfigData>();
|
||||
assert!(matches!(result, Err(Error::MissingAssignment)));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_unexpected_character_error() {
|
||||
let input = indoc::indoc!(
|
||||
r#"
|
||||
# system configuration generated and used by the sysconfig module
|
||||
build_time_vars = {
|
||||
"key1": &123
|
||||
}
|
||||
"#
|
||||
);
|
||||
|
||||
let result = input.parse::<SysconfigData>();
|
||||
assert!(
|
||||
result.is_err(),
|
||||
"Expected parsing to fail due to unexpected character"
|
||||
);
|
||||
}
|
||||
}
|
|
@ -76,6 +76,7 @@ pub(crate) fn create(
|
|||
base_executable,
|
||||
interpreter.python_major(),
|
||||
interpreter.python_minor(),
|
||||
interpreter.variant().suffix(),
|
||||
) {
|
||||
Ok(path) => path,
|
||||
Err(err) => {
|
||||
|
@ -654,7 +655,12 @@ fn copy_launcher_windows(
|
|||
/// environments.
|
||||
///
|
||||
/// See: <https://github.com/python/cpython/blob/a03efb533a58fd13fb0cc7f4a5c02c8406a407bd/Modules/getpath.py#L591-L594>
|
||||
fn find_base_python(executable: &Path, major: u8, minor: u8) -> Result<PathBuf, io::Error> {
|
||||
fn find_base_python(
|
||||
executable: &Path,
|
||||
major: u8,
|
||||
minor: u8,
|
||||
suffix: &str,
|
||||
) -> Result<PathBuf, io::Error> {
|
||||
/// Returns `true` if `path` is the root directory.
|
||||
fn is_root(path: &Path) -> bool {
|
||||
let mut components = path.components();
|
||||
|
@ -664,12 +670,12 @@ fn find_base_python(executable: &Path, major: u8, minor: u8) -> Result<PathBuf,
|
|||
/// Determining whether `dir` is a valid Python prefix by searching for a "landmark".
|
||||
///
|
||||
/// See: <https://github.com/python/cpython/blob/a03efb533a58fd13fb0cc7f4a5c02c8406a407bd/Modules/getpath.py#L183>
|
||||
fn is_prefix(dir: &Path, major: u8, minor: u8) -> bool {
|
||||
fn is_prefix(dir: &Path, major: u8, minor: u8, suffix: &str) -> bool {
|
||||
if cfg!(windows) {
|
||||
dir.join("Lib").join("os.py").is_file()
|
||||
} else {
|
||||
dir.join("lib")
|
||||
.join(format!("python{major}.{minor}"))
|
||||
.join(format!("python{major}.{minor}{suffix}"))
|
||||
.join("os.py")
|
||||
.is_file()
|
||||
}
|
||||
|
@ -685,7 +691,7 @@ fn find_base_python(executable: &Path, major: u8, minor: u8) -> Result<PathBuf,
|
|||
|
||||
// Determine whether this executable will produce a valid `home` for a virtual environment.
|
||||
for prefix in executable.ancestors().take_while(|path| !is_root(path)) {
|
||||
if is_prefix(prefix, major, minor) {
|
||||
if is_prefix(prefix, major, minor, suffix) {
|
||||
return Ok(executable.into_owned());
|
||||
}
|
||||
}
|
||||
|
|
|
@ -310,6 +310,7 @@ pub(crate) async fn install(
|
|||
// installations that match the request
|
||||
for installation in &installations {
|
||||
installation.ensure_externally_managed()?;
|
||||
installation.ensure_sysconfig_patched()?;
|
||||
installation.ensure_canonical_executables()?;
|
||||
|
||||
if preview.is_disabled() {
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue