improve Python environment activation (#118)

This commit is contained in:
Josh Thomas 2025-04-30 12:34:20 -05:00 committed by GitHub
parent 3b7ffe0b70
commit c09d6541ba
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
10 changed files with 669 additions and 290 deletions

View file

@ -27,7 +27,7 @@ and this project attempts to adhere to [Semantic Versioning](https://semver.org/
### Changed
- **Internal**: Moved task queueing functionality to `djls-server` crate, renamed from `Worker` to `Queue`, and simplified API.
- Improved Python environment detection.
- **Internal**: Improved Python environment handling, including refactored activation logic.
## [5.2.0a0]

View file

@ -13,8 +13,10 @@ anyhow = "1.0"
async-trait = "0.1"
pyo3 = "0.24"
pyo3-async-runtimes = "0.24"
pyo3-build-config = "0.24"
serde = { version = "1.0", features = ["derive"] }
serde_json = "1.0"
tempfile = "3.19"
thiserror = "2.0"
tokio = { version = "1.42", features = ["full"] }
tower-lsp-server = { version = "0.21", features = ["proposed"] }

View file

@ -4,10 +4,13 @@ version = "0.1.0"
edition = "2021"
[dependencies]
pyo3 = { workspace = true }
pyo3 = { workspace = true, features = ["auto-initialize"] }
tower-lsp-server = { workspace = true, features = ["proposed"] }
which = "7.0.1"
[build-dependencies]
pyo3-build-config = { workspace = true, features = ["resolve-config"] }
[dev-dependencies]
tempfile = "3.19"
tempfile = { workspace = true }

View file

@ -0,0 +1,45 @@
/// Build script to configure linking against the Python library.
///
/// This script is necessary when the crate's code, particularly tests
/// that interact with the Python interpreter via `pyo3` (e.g., using
/// `Python::with_gil`, importing modules, calling Python functions),
/// needs symbols from the Python C API at link time.
///
/// It uses `pyo3-build-config` to detect the Python installation and
/// prints the required `cargo:rustc-link-search` and `cargo:rustc-link-lib`
/// directives to Cargo, enabling the linker to find and link against the
/// appropriate Python library (e.g., libpythonX.Y.so).
///
/// It also adds an RPATH linker argument on Unix-like systems so the
/// resulting test executable can find the Python library at runtime.
///
/// Note: Each crate whose test target requires Python linking needs its
/// own `build.rs` with this logic.
fn main() {
println!("cargo:rerun-if-changed=build.rs");
// Set up #[cfg] flags first (useful for conditional compilation)
pyo3_build_config::use_pyo3_cfgs();
// Get the Python interpreter configuration directly
let config = pyo3_build_config::get();
// Add the library search path if available
if let Some(lib_dir) = &config.lib_dir {
println!("cargo:rustc-link-search=native={}", lib_dir);
// Add RPATH linker argument for Unix-like systems (Linux, macOS)
// This helps the test executable find the Python library at runtime.
#[cfg(not(windows))]
println!("cargo:rustc-link-arg=-Wl,-rpath,{}", lib_dir);
} else {
println!("cargo:warning=Python library directory not found in config.");
}
// Add the library link directive if available
if let Some(lib_name) = &config.lib_name {
println!("cargo:rustc-link-lib=dylib={}", lib_name);
} else {
println!("cargo:warning=Python library name not found in config.");
}
}

View file

@ -25,12 +25,13 @@ impl DjangoProject {
}
pub fn initialize(&mut self, venv_path: Option<&str>) -> PyResult<()> {
let python_env =
self.env = Some(
PythonEnvironment::new(self.path.as_path(), venv_path).ok_or_else(|| {
PyErr::new::<pyo3::exceptions::PyRuntimeError, _>(
"Could not find Python environment",
)
})?;
})?,
);
Python::with_gil(|py| {
let sys = py.import("sys")?;
@ -40,13 +41,12 @@ impl DjangoProject {
py_path.call_method1("insert", (0, path_str))?;
}
for path in &python_env.sys_path {
if let Some(path_str) = path.to_str() {
py_path.call_method1("append", (path_str,))?;
}
}
self.env = Some(python_env);
let env = self.env.as_ref().ok_or_else(|| {
PyErr::new::<pyo3::exceptions::PyRuntimeError, _>(
"Internal error: Python environment missing after initialization",
)
})?;
env.activate(py)?;
match py.import("django") {
Ok(django) => {
@ -121,7 +121,7 @@ impl PythonEnvironment {
}
fn from_venv_prefix(prefix: &Path) -> Option<Self> {
#[cfg(not(windows))]
#[cfg(unix)]
let python_path = prefix.join("bin").join("python");
#[cfg(windows)]
let python_path = prefix.join("Scripts").join("python.exe");
@ -131,13 +131,12 @@ impl PythonEnvironment {
return None;
}
let mut sys_path = Vec::new();
#[cfg(not(windows))]
#[cfg(unix)]
let bin_dir = prefix.join("bin");
#[cfg(windows)]
let bin_dir = prefix.join("Scripts");
let mut sys_path = Vec::new();
sys_path.push(bin_dir); // Add bin/ or Scripts/
if let Some(site_packages) = Self::find_site_packages(prefix) {
@ -154,6 +153,19 @@ impl PythonEnvironment {
})
}
pub fn activate(&self, py: Python) -> PyResult<()> {
let sys = py.import("sys")?;
let py_path = sys.getattr("path")?;
for path in &self.sys_path {
if let Some(path_str) = path.to_str() {
py_path.call_method1("append", (path_str,))?;
}
}
Ok(())
}
fn from_system_python() -> Option<Self> {
let python_path = match which("python") {
Ok(p) => p,
@ -180,7 +192,7 @@ impl PythonEnvironment {
})
}
#[cfg(not(windows))]
#[cfg(unix)]
fn find_site_packages(prefix: &Path) -> Option<PathBuf> {
// Look for lib/pythonX.Y/site-packages
let lib_dir = prefix.join("lib");
@ -221,10 +233,13 @@ mod tests {
use std::fs;
use tempfile::tempdir;
mod env_discovery {
use super::*;
fn create_mock_venv(dir: &Path, version: Option<&str>) -> PathBuf {
let prefix = dir.to_path_buf();
#[cfg(not(windows))]
#[cfg(unix)]
{
let bin_dir = prefix.join("bin");
fs::create_dir_all(&bin_dir).unwrap();
@ -235,7 +250,6 @@ mod tests {
fs::create_dir_all(&py_version_dir).unwrap();
fs::create_dir_all(py_version_dir.join("site-packages")).unwrap();
}
#[cfg(windows)]
{
let bin_dir = prefix.join("Scripts");
@ -290,12 +304,13 @@ mod tests {
let venv_dir = tempdir().unwrap();
let venv_prefix = create_mock_venv(venv_dir.path(), None);
let env = PythonEnvironment::new(project_dir.path(), Some(venv_prefix.to_str().unwrap()))
let env =
PythonEnvironment::new(project_dir.path(), Some(venv_prefix.to_str().unwrap()))
.expect("Should find environment with explicit path");
assert_eq!(env.sys_prefix, venv_prefix);
#[cfg(not(windows))]
#[cfg(unix)]
{
assert!(env.python_path.ends_with("bin/python"));
assert!(env.sys_path.contains(&venv_prefix.join("bin")));
@ -303,7 +318,6 @@ mod tests {
.sys_path
.contains(&venv_prefix.join("lib/python3.9/site-packages")));
}
#[cfg(windows)]
{
assert!(env.python_path.ends_with("Scripts\\python.exe"));
@ -324,7 +338,8 @@ mod tests {
// Provide an invalid explicit path
let invalid_path = project_dir.path().join("non_existent_venv");
let env = PythonEnvironment::new(project_dir.path(), Some(invalid_path.to_str().unwrap()))
let env =
PythonEnvironment::new(project_dir.path(), Some(invalid_path.to_str().unwrap()))
.expect("Should fall through to project .venv");
// Should have found the one in the project dir
@ -344,9 +359,8 @@ mod tests {
assert_eq!(env.sys_prefix, venv_prefix);
#[cfg(not(windows))]
#[cfg(unix)]
assert!(env.python_path.ends_with("bin/python"));
#[cfg(windows)]
assert!(env.python_path.ends_with("Scripts\\python.exe"));
}
@ -468,7 +482,7 @@ mod tests {
}
#[test]
#[cfg(not(windows))] // Test specific site-packages structure on Unix-like systems
#[cfg(unix)]
fn test_unix_site_packages_discovery() {
let venv_dir = tempdir().unwrap();
let prefix = venv_dir.path();
@ -535,13 +549,233 @@ mod tests {
// Create prefix dir but not the binary
fs::create_dir_all(prefix).unwrap();
#[cfg(not(windows))]
#[cfg(unix)]
fs::create_dir_all(prefix.join("bin")).unwrap();
#[cfg(windows)]
fs::create_dir_all(prefix.join("Scripts")).unwrap();
let result = PythonEnvironment::from_venv_prefix(prefix);
assert!(result.is_none());
}
}
mod env_activation {
use super::*;
fn get_sys_path(py: Python) -> PyResult<Vec<String>> {
let sys = py.import("sys")?;
let py_path = sys.getattr("path")?;
py_path.extract::<Vec<String>>()
}
fn create_test_env(sys_paths: Vec<PathBuf>) -> PythonEnvironment {
PythonEnvironment {
// Dummy values for fields not directly used by activate
python_path: PathBuf::from("dummy/bin/python"),
sys_prefix: PathBuf::from("dummy"),
sys_path: sys_paths,
}
}
#[test]
fn test_activate_appends_paths() -> PyResult<()> {
let temp_dir = tempdir().unwrap();
let path1 = temp_dir.path().join("scripts");
let path2 = temp_dir.path().join("libs");
fs::create_dir_all(&path1).unwrap();
fs::create_dir_all(&path2).unwrap();
let test_env = create_test_env(vec![path1.clone(), path2.clone()]);
Python::with_gil(|py| {
let initial_sys_path = get_sys_path(py)?;
let initial_len = initial_sys_path.len();
test_env.activate(py)?;
let final_sys_path = get_sys_path(py)?;
assert_eq!(
final_sys_path.len(),
initial_len + 2,
"Should have added 2 paths"
);
// Check that the *exact* paths were appended in the correct order
assert_eq!(
final_sys_path.get(initial_len).unwrap(),
path1.to_str().expect("Path 1 should be valid UTF-8")
);
assert_eq!(
final_sys_path.get(initial_len + 1).unwrap(),
path2.to_str().expect("Path 2 should be valid UTF-8")
);
Ok(())
})
}
#[test]
fn test_activate_empty_sys_path() -> PyResult<()> {
let test_env = create_test_env(vec![]);
Python::with_gil(|py| {
let initial_sys_path = get_sys_path(py)?;
test_env.activate(py)?;
let final_sys_path = get_sys_path(py)?;
assert_eq!(
final_sys_path, initial_sys_path,
"sys.path should remain unchanged for empty env.sys_path"
);
Ok(())
})
}
#[test]
fn test_activate_with_non_existent_paths() -> PyResult<()> {
let temp_dir = tempdir().unwrap();
// These paths do not actually exist on the filesystem
let path1 = temp_dir.path().join("non_existent_dir");
let path2 = temp_dir.path().join("another_missing/path");
let test_env = create_test_env(vec![path1.clone(), path2.clone()]);
Python::with_gil(|py| {
let initial_sys_path = get_sys_path(py)?;
let initial_len = initial_sys_path.len();
test_env.activate(py)?;
let final_sys_path = get_sys_path(py)?;
assert_eq!(
final_sys_path.len(),
initial_len + 2,
"Should still add 2 paths even if they don't exist"
);
assert_eq!(
final_sys_path.get(initial_len).unwrap(),
path1.to_str().unwrap()
);
assert_eq!(
final_sys_path.get(initial_len + 1).unwrap(),
path2.to_str().unwrap()
);
Ok(())
})
}
#[test]
#[cfg(unix)]
fn test_activate_skips_non_utf8_paths_unix() -> PyResult<()> {
use std::ffi::OsStr;
use std::os::unix::ffi::OsStrExt;
let temp_dir = tempdir().unwrap();
let valid_path = temp_dir.path().join("valid_dir");
fs::create_dir(&valid_path).unwrap();
// Create a PathBuf from invalid UTF-8 bytes
let invalid_bytes = b"invalid_\xff_utf8";
let os_str = OsStr::from_bytes(invalid_bytes);
let non_utf8_path = PathBuf::from(os_str);
// Sanity check: ensure this path *cannot* be converted to str
assert!(
non_utf8_path.to_str().is_none(),
"Path should not be convertible to UTF-8 str"
);
let test_env = create_test_env(vec![valid_path.clone(), non_utf8_path.clone()]);
Python::with_gil(|py| {
let initial_sys_path = get_sys_path(py)?;
let initial_len = initial_sys_path.len();
test_env.activate(py)?;
let final_sys_path = get_sys_path(py)?;
// Should have added only the valid path
assert_eq!(
final_sys_path.len(),
initial_len + 1,
"Should only add valid UTF-8 paths"
);
assert_eq!(
final_sys_path.get(initial_len).unwrap(),
valid_path.to_str().unwrap()
);
// Check that the invalid path string representation is NOT present
let invalid_path_lossy = non_utf8_path.to_string_lossy();
assert!(
!final_sys_path
.iter()
.any(|p| p.contains(&*invalid_path_lossy)),
"Non-UTF8 path should not be present in sys.path"
);
Ok(())
})
}
#[test]
#[cfg(windows)] // Test specific behavior for invalid UTF-16/WTF-8 on Windows
fn test_activate_skips_non_utf8_paths_windows() -> PyResult<()> {
use std::ffi::OsString;
use std::os::windows::ffi::OsStringExt;
let temp_dir = tempdir().unwrap();
let valid_path = temp_dir.path().join("valid_dir");
// No need to create dir, just need the PathBuf
// Create an OsString from invalid UTF-16 (a lone surrogate)
// D800 is a high surrogate, not valid unless paired with a low surrogate.
let invalid_wide: Vec<u16> = vec![
'i' as u16, 'n' as u16, 'v' as u16, 'a' as u16, 'l' as u16, 'i' as u16, 'd' as u16,
'_' as u16, 0xD800, '_' as u16, 'w' as u16, 'i' as u16, 'd' as u16, 'e' as u16,
];
let os_string = OsString::from_wide(&invalid_wide);
let non_utf8_path = PathBuf::from(os_string);
// Sanity check: ensure this path *cannot* be converted to a valid UTF-8 str
assert!(
non_utf8_path.to_str().is_none(),
"Path with lone surrogate should not be convertible to UTF-8 str"
);
let test_env = create_test_env(vec![valid_path.clone(), non_utf8_path.clone()]);
Python::with_gil(|py| {
let initial_sys_path = get_sys_path(py)?;
let initial_len = initial_sys_path.len();
test_env.activate(py)?;
let final_sys_path = get_sys_path(py)?;
// Should have added only the valid path
assert_eq!(
final_sys_path.len(),
initial_len + 1,
"Should only add paths convertible to valid UTF-8"
);
assert_eq!(
final_sys_path.get(initial_len).unwrap(),
valid_path.to_str().unwrap()
);
// Check that the invalid path string representation is NOT present
let invalid_path_lossy = non_utf8_path.to_string_lossy();
assert!(
!final_sys_path
.iter()
.any(|p| p.contains(&*invalid_path_lossy)),
"Non-UTF8 path (from invalid wide chars) should not be present in sys.path"
);
Ok(())
})
}
}
}

View file

@ -16,3 +16,6 @@ tokio = { workspace = true }
tower-lsp-server = { workspace = true }
percent-encoding = "2.3"
[build-dependencies]
pyo3-build-config = { workspace = true, features = ["resolve-config"] }

View file

@ -0,0 +1,45 @@
/// Build script to configure linking against the Python library.
///
/// This script is necessary when the crate's code, particularly tests
/// that interact with the Python interpreter via `pyo3` (e.g., using
/// `Python::with_gil`, importing modules, calling Python functions),
/// needs symbols from the Python C API at link time.
///
/// It uses `pyo3-build-config` to detect the Python installation and
/// prints the required `cargo:rustc-link-search` and `cargo:rustc-link-lib`
/// directives to Cargo, enabling the linker to find and link against the
/// appropriate Python library (e.g., libpythonX.Y.so).
///
/// It also adds an RPATH linker argument on Unix-like systems so the
/// resulting test executable can find the Python library at runtime.
///
/// Note: Each crate whose test target requires Python linking needs its
/// own `build.rs` with this logic.
fn main() {
println!("cargo:rerun-if-changed=build.rs");
// Set up #[cfg] flags first (useful for conditional compilation)
pyo3_build_config::use_pyo3_cfgs();
// Get the Python interpreter configuration directly
let config = pyo3_build_config::get();
// Add the library search path if available
if let Some(lib_dir) = &config.lib_dir {
println!("cargo:rustc-link-search=native={}", lib_dir);
// Add RPATH linker argument for Unix-like systems (Linux, macOS)
// This helps the test executable find the Python library at runtime.
#[cfg(not(windows))]
println!("cargo:rustc-link-arg=-Wl,-rpath,{}", lib_dir);
} else {
println!("cargo:warning=Python library directory not found in config.");
}
// Add the library link directive if available
if let Some(lib_name) = &config.lib_name {
println!("cargo:rustc-link-lib=dylib={}", lib_name);
} else {
println!("cargo:warning=Python library name not found in config.");
}
}

View file

@ -29,13 +29,12 @@ fn uri_to_pathbuf(uri: &Uri) -> Option<PathBuf> {
// Decode the percent-encoded path string
let decoded_path_cow = percent_decode_str(encoded_path_str).decode_utf8_lossy();
let path_str = decoded_path_cow.as_ref();
#[cfg(unix)]
let path_str = decoded_path_cow.as_ref();
#[cfg(windows)]
let path_str = {
// Remove leading '/' for paths like /C:/...
path_str.strip_prefix('/').unwrap_or(path_str)
};
let path_str = path_str.strip_prefix('/').unwrap_or(path_str);
Some(PathBuf::from(path_str))
}

View file

@ -18,3 +18,6 @@ tokio = { workspace = true }
tower-lsp-server = { workspace = true }
clap = { version = "4.5", features = ["derive"] }
[build-dependencies]
pyo3-build-config = { workspace = true, features = ["resolve-config"] }

45
crates/djls/build.rs Normal file
View file

@ -0,0 +1,45 @@
/// Build script to configure linking against the Python library.
///
/// This script is necessary when the crate's code, particularly tests
/// that interact with the Python interpreter via `pyo3` (e.g., using
/// `Python::with_gil`, importing modules, calling Python functions),
/// needs symbols from the Python C API at link time.
///
/// It uses `pyo3-build-config` to detect the Python installation and
/// prints the required `cargo:rustc-link-search` and `cargo:rustc-link-lib`
/// directives to Cargo, enabling the linker to find and link against the
/// appropriate Python library (e.g., libpythonX.Y.so).
///
/// It also adds an RPATH linker argument on Unix-like systems so the
/// resulting test executable can find the Python library at runtime.
///
/// Note: Each crate whose test target requires Python linking needs its
/// own `build.rs` with this logic.
fn main() {
println!("cargo:rerun-if-changed=build.rs");
// Set up #[cfg] flags first (useful for conditional compilation)
pyo3_build_config::use_pyo3_cfgs();
// Get the Python interpreter configuration directly
let config = pyo3_build_config::get();
// Add the library search path if available
if let Some(lib_dir) = &config.lib_dir {
println!("cargo:rustc-link-search=native={}", lib_dir);
// Add RPATH linker argument for Unix-like systems (Linux, macOS)
// This helps the test executable find the Python library at runtime.
#[cfg(not(windows))]
println!("cargo:rustc-link-arg=-Wl,-rpath,{}", lib_dir);
} else {
println!("cargo:warning=Python library directory not found in config.");
}
// Add the library link directive if available
if let Some(lib_name) = &config.lib_name {
println!("cargo:rustc-link-lib=dylib={}", lib_name);
} else {
println!("cargo:warning=Python library name not found in config.");
}
}